當(dāng)前位置：首頁(yè) > 编程语言 > python >内容正文

python

python 决策树 math库 c45算法

發(fā)布時(shí)間：2023/12/20 python 27 豆豆

生活随笔收集整理的這篇文章主要介紹了 python 决策树 math库 c45算法小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.

2019獨(dú)角獸企業(yè)重金招聘Python工程師標(biāo)準(zhǔn)>>>

每周一搏，提升自我。

這段時(shí)間對(duì)python的應(yīng)用，對(duì)python的理解越來(lái)越深。摸索中修改網(wǎng)上實(shí)例代碼，有了自己的理解。

c45是ID3算法的升級(jí)版，比ID3高級(jí)。個(gè)人建議，用CART算法，感覺(jué)比C45好。

下面是c45代碼，其中顯示決策樹(shù)結(jié)構(gòu)的代碼，下篇博文發(fā)布。

#!/usr/bin/python #coding:utf-8import operator from math import log import time import os,sys import string#已文件為數(shù)據(jù)源 def createDataSet(trainDataFile):print trainDataFiledataSet=[]try:fin=open(trainDataFile)for line in fin:line=line.strip('\n') #清除行皆為換行符cols=line.split(',') #逗號(hào)分割行信息row =[cols[1],cols[2],cols[3],cols[4],cols[5],cols[6],cols[7],cols[8],cols[9],cols[10],cols[0]]dataSet.append(row)#print rowexcept:print 'Usage xxx.py trainDataFilePath'sys.exit()labels=['cip1', 'cip2', 'cip3', 'cip4', 'sip1', 'sip2', 'sip3', 'sip4', 'sport', 'domain']print 'dataSetlen',len(dataSet)return dataSet,labels#c4.5 信息熵算法 def calcShannonEntOfFeature(dataSet,feat):numEntries=len(dataSet)labelCounts={}for feaVec in dataSet:currentLabel=feaVec[feat]if currentLabel not in labelCounts:labelCounts[currentLabel]=0labelCounts[currentLabel]+=1shannonEnt=0.0for key in labelCounts:prob=float(labelCounts[key])/numEntriesshannonEnt-=prob * log(prob,2)return shannonEntdef splitDataSet(dataSet,axis,value):retDataSet=[]for featVec in dataSet:if featVec[axis] ==value:reducedFeatVec=featVec[:axis]reducedFeatVec.extend(featVec[axis+1:])retDataSet.append(reducedFeatVec)return retDataSetdef chooseBestFeatureToSplit(dataSet):numFeatures=len(dataSet[0])-1baseEntropy=calcShannonEntOfFeature(dataSet,-1)bestInfoGainRate=0.0bestFeature=-1for i in range(numFeatures):featList=[example[i] for example in dataSet]uniqueVals=set(featList)newEntropy=0.0for value in uniqueVals:subDataSet=splitDataSet(dataSet,i,value)prob=len(subDataSet) / float(len(dataSet))newEntropy+=prob * calcShannonEntOfFeature(subDataSet,-1)infoGain=baseEntropy- newEntropyiv = calcShannonEntOfFeature(dataSet,i)if(iv == 0):continueinfoGainRate= infoGain /ivif infoGainRate > bestInfoGainRate:bestInfoGainRate = infoGainRatebestFeature = ireturn bestFeaturedef majorityCnt(classList):classCount={}for vote in classList:if vote not in classCount.keys():classCount[vote]=0classCount[vote] +=1return max(classCount)def createTree(dataSet,labels):classList= [example[-1] for example in dataSet]if classList.count(classList[0]) == len(classList):return classList[0]if len(dataSet[0]) == 1:return majorityCnt(classList)bestFeat = chooseBestFeatureToSplit(dataSet)bestFeatLabel = labels[bestFeat]if(bestFeat == -1): #特征一樣，但類(lèi)別不一樣，即類(lèi)別與特征不相關(guān)，隨機(jī)選第一個(gè)類(lèi)別分類(lèi)結(jié)果return classList[0]myTree={bestFeatLabel:{}}del(labels[bestFeat])featValues = [example[bestFeat] for example in dataSet]uniqueVals =set(featValues)for value in uniqueVals:subLabels = labels [:]myTree[bestFeatLabel][value]=createTree(splitDataSet(dataSet,bestFeat,value),subLabels)return myTree#創(chuàng)建簡(jiǎn)單的數(shù)據(jù)集武器類(lèi)型（0 步槍 1機(jī)槍），子彈（0 少 1多），血量（0 少，1多） fight戰(zhàn)斗 1逃跑 def createDataSet():dataSet =[[1,1,0,'fight'],[1,0,1,'fight'],[1,0,1,'fight'],[1,0,1,'fight'],[0,0,1,'run'],[0,1,0,'fight'],[0,1,1,'run']]lables=['weapon','bullet','blood']return dataSet,lables#按行打印數(shù)據(jù)集 def printData(myData):for item in myData:print '%s' %(item)#使用決策樹(shù)分類(lèi) def classify(inputTree,featLabels,testVec):firstStr=inputTree.keys()[0]secondDict=inputTree[firstStr]featIndex=featLabels.index(firstStr)for key in secondDict.keys():if testVec[featIndex] ==key:if type(secondDict[key]).__name__=='dict':classLabel=classify(secondDict[key],featLabels,testVec)else:classLabel=secondDict[key]return classLabel#存儲(chǔ)決策樹(shù) def storeTree(inputTree,filename):import picklefw=open(filename,'w')pickle.dump(inputTree,fw)fw.close()#獲取決策樹(shù) def grabTree(filename):import picklefr=open(filename)return pickle.load(fr)def main():data,label =createDataSet()myTree=createTree(data,label)print(myTree)#打印決策樹(shù)import showTree as showshow.createPlot(myTree)if __name__ == '__main__':main()

調(diào)用的showTree.py，內(nèi)容如下：

#!/usr/bin/python #coding:utf-8import matplotlib.pyplot as plt#決策樹(shù)屬性設(shè)置 decisionNode=dict(boxstyle="sawtooth",fc="0.8") leafNode=dict(boxstyle="round4",fc="0.8") arrow_args=dict(arrowstyle="<-")#createPlot 主函數(shù)，調(diào)用即可畫(huà)出決策樹(shù)，其中調(diào)用登了剩下的所有的函數(shù)，inTree的形式必須為嵌套的決策樹(shù) def createPlot(inThree):fig=plt.figure(1,facecolor='white')fig.clf()axprops=dict(xticks=[],yticks=[])createPlot.ax1=plt.subplot(111,frameon=False,**axprops) #no ticks# createPlot.ax1=plt.subplot(111,frameon=False) #ticks for demo puropsesplotTree.totalW=float(getNumLeafs(inThree))plotTree.totalD=float(getTreeDepth(inThree))plotTree.xOff=-0.5/plotTree.totalW;plotTree.yOff=1.0plotTree(inThree,(0.5,1.0),'')plt.show()#決策樹(shù)上節(jié)點(diǎn)之間的箭頭設(shè)置 def plotNode(nodeTxt,centerPt,parentPt,nodeType):createPlot.ax1.annotate(nodeTxt,xy=parentPt,xycoords='axes fraction',xytext=centerPt,textcoords='axes fraction',va="center",ha="center",bbox=nodeType,arrowprops=arrow_args)#決策樹(shù)文字的添加位置和角度 def plotMidText(cntrPt,parentPt,txtString):xMid=(parentPt[0] -cntrPt[0])/2.0 +cntrPt[0]yMid=(parentPt[1] -cntrPt[1])/2.0 +cntrPt[1]createPlot.ax1.text(xMid,yMid,txtString,va="center",ha="center",rotation=30)#得到葉子節(jié)點(diǎn)的數(shù)量 def getNumLeafs(myTree):numLeafs=0firstStr=myTree.keys()[0]secondDict=myTree[firstStr]for key in secondDict.keys():if type(secondDict[key]).__name__=='dict':#test to see if the nodes are dictonaires, if not they are leaf nodesnumLeafs += getNumLeafs(secondDict[key])else: numLeafs+=1return numLeafs#得到?jīng)Q策樹(shù)的深度 def getTreeDepth(myTree):maxDepthh=0firstStr=myTree.keys()[0]secondDict=myTree[firstStr]for key in secondDict.keys():if type(secondDict[key]).__name__=='dict':thisDepth=1+getTreeDepth(secondDict[key])else: thisDepth=1if thisDepth>maxDepthh:maxDepthh=thisDepthreturn maxDepthh#父子節(jié)點(diǎn)之間畫(huà)決策樹(shù) def plotTree(myTree,parentPt,nodeTxt):numLeafs=getNumLeafs(myTree)depth=getTreeDepth(myTree)firstStr=myTree.keys()[0]cntrPt=(plotTree.xOff +(1.0+float(numLeafs))/2.0/plotTree.totalW,plotTree.yOff)plotMidText(cntrPt,parentPt,nodeTxt)plotNode(firstStr,cntrPt,parentPt,decisionNode)secondDict=myTree[firstStr]plotTree.yOff = plotTree.yOff - 1.0/plotTree.totalDfor key in secondDict.keys():if type(secondDict[key]).__name__=='dict':plotTree(secondDict[key],cntrPt,str(key))else:plotTree.xOff=plotTree.xOff+1.0/plotTree.totalWplotNode(secondDict[key],(plotTree.xOff,plotTree.yOff),cntrPt,leafNode)plotMidText((plotTree.xOff,plotTree.yOff),cntrPt,str(key))plotTree.yOff=plotTree.yOff+1.0/plotTree.totalD

轉(zhuǎn)載于:https://my.oschina.net/wangzonghui/blog/1617580

總結(jié)

以上是生活随笔為你收集整理的python 决策树 math库 c45算法的全部?jī)?nèi)容，希望文章能夠幫你解決所遇到的問(wèn)題。

如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò)，歡迎將生活随笔推薦給好友。

上一篇：【Altium Designer】Dat
下一篇： python之路----验证客户端合法性