python决策树预测模型_python_机器学习_监督学习模型_决策树
#-*- coding:utf-8 -*-from sklearn.feature_extraction importDictVectorizerimportcsvfrom sklearn importpreprocessingfrom sklearn import tree#要求是数值型的值from sklearn.externals.six importStringIO"""注意: 决策树..
#-*- coding:utf-8 -*-
from sklearn.feature_extraction importDictVectorizerimportcsvfrom sklearn importpreprocessingfrom sklearn import tree #要求是数值型的值
from sklearn.externals.six importStringIO"""注意: 决策树要求要数值型的值,不能是字符串类型的值
例如: no, yes这样的值是不允许的
需要转换成矩阵
====================================
age income student
youth high no
youth high no
middle_aged high no
senior medium no
senior low yes
====================================
比如上面这种数据:
youth middle_aged senior high medium low ......
1 0 0 1 0 0
1 0 0 1 0 0
....."""allElectronicsData= open(r"C:\Users\Administrator\Desktop\data.xlsx", ‘r‘)
reader=csv.reader(allElectronicsData)print(reader)
headers=next(reader)print(headers)#["RID", ‘age‘.....]
featureList=[]
labelList=[]for row inreader:
labelList.append(row[len(row)- 1])
rowDict={}for i in range(1, len(row) - 1):
rowDict[headers[i]]=row[i]
featureList.append(rowDict)print(featureList)#[#{"credit_rating": "fair", "age": "youth"},#.... #作用,方便转换成矩阵。将数据转换成对象#]
vec=DictVectorizer()
dummyX=vec.fit_transform(featureList).toarray()print("dummyX:" + str(dummyX)) #转换成矩阵的数据了二维
print(vec.get_feature_names())print("labelList:" +str(labelList))
lb=preprocessing.LabelBinarizer()
dummyY=lb.fit_transform(labelList)print("dummyY:" +str(dummyY))
clf= tree.DecisionTreeClassifier(criterion="entropy")
clf=clf.fit(dummyX, dummyY)print("clf:", str(clf))#存储决策树信息
#Graphviz 将dot转换成pdf的命令: dot -T pdf iris.dot -o output.pdf#可以查看decision tree 的形状了(看pdf的值)
with open(r"C:\Users\Administrator\Desktop\code\mechine_learning\allElectronicInformationGainOri.dot", "w") as f:
f= tree.export_graphviz(clf, feature_names = vec.get_feature_names(), out_file =f)#下面的代码属于预测的代码#属于转化后的矩阵数值,其实就是进行复制修改
oneRowX =dummyX[0, :]print("oneRowX:" +str(oneRowX))
newRowX=oneRowX
newRowX[0]= 1newRowX[2] =0print("newRowX:", str(newRowX))
predictedY=clf.predicted(newRowX)#预测 class_buys_labels的值
predicted("predictedY:" +str(predictedY))if __name__ == ‘__main__‘:
main()
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐



所有评论(0)