日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程语言 > python >内容正文

python

python决策树代码实现

發布時間:2024/1/1 python 31 豆豆
生活随笔 收集整理的這篇文章主要介紹了 python决策树代码实现 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

python決策樹代碼實現

實現一個簡單的決策樹,可以同時處理屬性值是連續和離散的情況。
使用sklearn里面的鳶尾花等數據集驗證,正確率還不錯(90%+)

Github地址:https://github.com/nhjydywd/DecisionTree


使用方式: import DecisionTreenode = DecisionTree.trainDecisionTree(labels, attrs)result = node.predict(attr)

以下為決策樹的代碼(DecisionTree.py):

import numpy as npdef trainDecisionTree(np_label, np_attrs):print("Data shape: " + str(np.shape(np_attrs)))# To decide whether an attribute is discreteb_discrete = []TH_DISCRETE = 10for i in range(0,np.shape(np_attrs)[1]):s = set()b_discrete.append(True)col = np_attrs[:,i]for x in col:s.add(x)if(len(s) > TH_DISCRETE):b_discrete[-1] = Falsenode = TreeNode()processTreeNode(node, np_label, np_attrs, b_discrete)return nodedef compareEqual(left, right):return left == right# def compareNotEqual(left, right): # return left != rightdef compareLessOrEqual(left, right):return left <= right# def compareBiggerOrEqual(left, right): # return left >= rightclass TreeNode:def __init__(self):self.label = Noneself.lChild = Noneself.rChild = Noneself.compareIndexAttr = Noneself.compareValue = Noneself.compareMethod = Nonedef accept(self, attrs):attr = attrs[self.compareIndexAttr]if self.compareMethod(attr, self.compareValue):return Truereturn Falsedef predict(self, attrs):if(self.label != None):return self.labelif self.lChild.accept(attrs):return self.lChild.predict(attrs)else:return self.rChild.predict(attrs)# Impossible!print("TreeNode Error: no child accept!")print("arrts is: " + attrs)exit(-1)def devide(np_label, np_attrs, compareIndexAttr, compareMethod, compareValue):left_label = []left_attrs = []right_label = []right_attrs = []for i in range(0,np.shape(np_attrs)[0]):value = np_attrs[i][compareIndexAttr]label = np_label[i]attr = np_attrs[i]if(compareMethod(value, compareValue)):left_label.append(label)left_attrs.append(attr)else:right_label.append(label)right_attrs.append(attr)left_np_label = np.array(left_label)left_np_attrs = np.array(left_attrs)right_np_label = np.array(right_label)right_np_attrs = np.array(right_attrs)return left_np_label, left_np_attrs, right_np_label, right_np_attrsdef countDistinctValues(np_values):s = dict()for v in np_values:if v in s:s[v] += 1else:s[v] = 1return sdef findDevidePoint(np_label, np_attrs, indexAttr, bDiscrete):if bDiscrete:compareMethod = compareEqualcandidateValue = countDistinctValues(np_attrs[:,indexAttr])else:compareMethod = compareLessOrEqualsorted_a = (np_attrs[np_attrs[:,indexAttr].argsort()])[:,indexAttr]candidateValue = []for i in range(0, len(sorted_a) - 1):v = (sorted_a[i] + sorted_a[i+1]) / 2candidateValue.append(v)minGiniIndex = 1for v in candidateValue:l_label, l_attr, r_label, r_attr = devide(np_label, np_attrs, indexAttr, compareMethod, v)ls_label = [l_label, r_label]theGiniIndex = giniIndex(ls_label)if theGiniIndex < minGiniIndex:minGiniIndex = theGiniIndexcompareValue = vreturn compareMethod, compareValue, minGiniIndexdef processTreeNode(node, np_label, np_attrs, b_discrete):if len(np_label) != len(np_attrs):print("Error: label size != attr size")exit(-1)if len(np_label) <= 0:print("Error: label size <= 0!")exit(-1)if np.shape(np_attrs)[1] != len(b_discrete):print("Error: numbers of attrs != length of b_discrete!")exit(-1)if isArrayElementIdentity(np_label):node.label = np_label[0]returnNUM_END = 5;if len(np_label) <= NUM_END:node.label = getMostElement(np_label)returnif len(np_label) > 1000:print("Current recursion data size: " + str(len(np_label)))# Find the best attribute to divide.minGiniIndex = 1# ls_thread = []for i in range(0, np.shape(np_attrs)[1]):compareMethod, compareValue, giniIndex = findDevidePoint(np_label, np_attrs, i, b_discrete[i])if giniIndex < minGiniIndex:minGiniIndex = giniIndexchooseAttrIndex = ichooseCompareMethod = compareMethodchooseCompareValue = compareValue# Divide the datasetl_label, l_attrs, r_label, r_attrs = devide(np_label,np_attrs,chooseAttrIndex,chooseCompareMethod,chooseCompareValue)# Generate subtreesnode.lChild = TreeNode()node.lChild.compareIndexAttr = chooseAttrIndexnode.lChild.compareMethod = chooseCompareMethodnode.lChild.compareValue = chooseCompareValueif np.shape(l_label)[0] == 0:node.lChild.label = getMostElement(np_label)else:processTreeNode(node.lChild, l_label, l_attrs, b_discrete)node.rChild = TreeNode()if np.shape(r_label)[0] == 0:node.rChild.label = getMostElement(np_label)else:processTreeNode(node.rChild, r_label, r_attrs, b_discrete)def isArrayElementIdentity(np_array):e = np_array[0]for x in np_array:if x != e:return Falsereturn Truedef getMostElement(np_array):dictCount = {}for x in np_array:if x in dictCount.keys():dictCount[x] += 1else:dictCount[x] = 1max = -1result = Nonefor key in dictCount:if dictCount[key] > max:result = keymax = dictCount[key]return resultdef gini(ls_p):result = 1for p in ls_p:result -= p*preturn resultdef giniIndex(ls_devide_np_label):countTotal = 0for np_label in ls_devide_np_label:countTotal += np.shape(np_label)[0]result = 0for np_label in ls_devide_np_label:countValues = countDistinctValues(np_label)ls_p = []for v in countValues:p = countValues[v] / np.shape(np_label)[0]ls_p.append(p)result += gini(ls_p) * np.shape(np_label)[0] / countTotalreturn result

總結

以上是生活随笔為你收集整理的python决策树代码实现的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。