Data Science
Data Science
ipynb - Colaboratory
1 import pandas as pd
2 import matplotlib.pyplot as plt
3 import sklearn
4 import numpy as np
5
1 data = pd.read_csv("Data211.csv")
2 #Print top 5 rows of the dataframe
3 data.head()
X1 X2 X3 X4 X5 X6 X7 X8 Y1 Y2
1 X = data.iloc[:, :-1]
2 y = data.iloc[:, -1]
X1 X2 X3 X4 X5 X6 X7 X8 Y1
54 0.90 563.5 318.5 122.50 7.0 4 0.10 1 29.14
30 0.71 710.5 269.5 220.50 3.5 4 0.00 0 6.37
424 0.64 784.0 343.0 220.50 3.5 2 0.25 3 16.83
682 0.86 588.0 294.0 147.00 7.0 4 0.40 4 32.15
100 0.90 563.5 318.5 122.50 7.0 2 0.10 2 28.88
.. ... ... ... ... ... .. ... .. ...
763 0.64 784.0 343.0 220.50 3.5 5 0.40 5 17.88
192 0.98 514.5 294.0 110.25 7.0 2 0.10 4 24.38
629 0.90 563.5 318.5 122.50 7.0 3 0.40 3 34.95
559 0.71 710.5 269.5 220.50 3.5 5 0.40 1 14.58
684 0.82 612.5 318.5 147.00 7.0 2 0.40 4 28.93
X1 X2 X3 X4 X5 X6 X7 X8 Y1 Y2
661 0.66 759.5 318.5 220.50 3.5 3.0 0.4 3.0 15.18 NaN
122 0.74 686.0 245.0 220.50 3.5 4.0 0.1 2.0 10.32 NaN
113 0.79 637.0 343.0 147.00 7.0 3.0 0.1 2.0 37.26 NaN
14 0.82 612.5 318.5 147.00 7.0 4.0 0.0 0.0 16.95 NaN
529 0.98 514.5 294.0 110.25 7.0 3.0 0.4 1.0 32.26 NaN
1 Train_data.describe()
2 corr_mat = Train_data.corr(method ='pearson')
3 corr_mat
X1 X2 X3 X4 X5 X6 X7 X8 Y1 Y2
X1 1.000000 -0.991563 -0.200837 -0.870450 0.819848 -0.014718 -0.036640 -0.037592 0.594804 NaN
X2 -0.991563 1.000000 0.190741 0.883489 -0.853371 0.009437 0.037866 0.031705 -0.635506 NaN
X3 -0.200837 0.190741 1.000000 -0.291333 0.290551 -0.016808 -0.010377 0.030027 0.465589 NaN
X4 -0.870450 0.883489 -0.291333 1.000000 -0.970276 0.017218 0.041853 0.016568 -0.841494 NaN
X5 0.819848 -0.853371 0.290551 -0.970276 1.000000 -0.008448 -0.044356 -0.004967 0.877522 NaN
X6 -0.014718 0.009437 -0.016808 0.017218 -0.008448 1.000000 -0.099348 -0.013862 -0.025379 NaN
X7 -0.036640 0.037866 -0.010377 0.041853 -0.044356 -0.099348 1.000000 0.195659 0.245926 NaN
X8 -0.037592 0.031705 0.030027 0.016568 -0.004967 -0.013862 0.195659 1.000000 0.096695 NaN
Y1 0.594804 -0.635506 0.465589 -0.841494 0.877522 -0.025379 0.245926 0.096695 1.000000 NaN
Y2 NaN NaN NaN NaN NaN NaN NaN NaN NaN 1.0
1 plt.matshow(corr_mat)
2 plt.xticks(range(len(corr_mat.columns)), corr_mat.columns, rotation=90)
3 plt.yticks(range(len(corr_mat.columns)), corr_mat.columns)
4 plt.colorbar()
5 plt.show()
1 plt.scatter(X_train['X1'], y_train)
2 plt.title("Roof area vs heating load")
3 plt.xlabel("roof area")
4 plt.ylabel("heating load")
5 plt.plot()
[]
▾ KNeighborsRegressor
KNeighborsRegressor(n_neighbors=2)
1 y_pred = Reg.predict(X_test)
2 print(y_pred)
3
4
https://colab.research.google.com/drive/1EEP1v692iITCRu4S7WZXI9moiEtSgFqt#scrollTo=RSZldCtVaRho&printMode=true 1/1