第1关:初窥数据
import pandas as pd
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
#********* Begin *********#
# 读取./train_data.csv并可视化标签的分布并保存可视化结果到./stpe1/dump/result.jpg
df = pd.read_csv('./train_data.csv')# 读取数据
df['Cover_Type'].hist(bins=10)# 绘制树木类型的直方图
plt.savefig('./step1/dump/result.jpg')# 保存数据
plt.show()# 可视化
#********* End *********#
第2关:特征选择
import pandas as pd
df = pd.read_csv('./train_data.csv')
r = df.drop(['Hillshade_3pm'], axis = 1)
print(r)
第3关:树木类型识别
def predict_cover_type(train_feature, label, test_feature):
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=10)
rfc.fit(train_feature,label)
return rfc.predict(test_feature)
def predict_cover_type(train_feature, label, test_feature):
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=10)#
rfc.fit(train_feature,label)
return rfc.predict(test_feature)