當前位置：首頁 > 编程语言 > python >内容正文

python

决策树python建模中的坑：ValueError: Expected 2D array, got 1D array instead:

發布時間：2025/3/21 python 27 豆豆

生活随笔收集整理的這篇文章主要介紹了决策树python建模中的坑：ValueError: Expected 2D array, got 1D array instead: 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

決策樹python建模中的坑

代碼

#coding=utf-8

from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import tree
from sklearn import preprocessing
from sklearn.externals.six import StringIO

allElectronicsData = open(r"D:\workspace\python\files\AllElectronics.csv")

reader = csv.reader(allElectronicsData)
headers = reader.next()
print (headers)
featureList = []
labelList = []

for row in reader:
labelList.append(row[len(row)-1])
rowDict = {}
for i in range(1,len(row)-1):
rowDict[headers[i]]=row[i]
featureList.append(rowDict)
print (featureList)
#Vetorrize features
vec = DictVectorizer()
dummyX = vec.fit_transform(featureList).toarray()
print ("dummyx:" + str(dummyX))
print (vec.get_feature_names())

print ("labelList:" + str(labelList))
# vectorize class labels
lb =preprocessing.LabelBinarizer()
dummyY = lb.fit_transform(labelList)
print ("dummyY:"+ str(dummyY))

#Using decision tree for classification
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf =clf.fit(dummyX,dummyY)
print ("clf:"+str(clf))

#Visualize mpdel
with open("allElectornicinformationGainOri.dot",'w')as f:
f = tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
#dot 轉化成pdf 樹：dot -Tpdf " " -o output.pdf
oneRowx = dummyX[0,:]
print ("oneRowx"+str(oneRowx))
#測試模型
newRowX = oneRowx
#這里有個坑，一定要注意維度 numpy！！！
newRowX[0] = 0
newRowX[2] = 1
newRowX.reshape(1, -1)
print ("newRowx:" + str(newRowX))

predictedY = clf.predict(oneRowx)
print ("predictedY"+str(predictedY))

錯誤如下：

Traceback (most recent call last):
File "D:/workspace/python/.idea/decision_tree.py", line 55, in <module>
predictedY = clf.predict(oneRowx)
File "C:\Python27\lib\site-packages\sklearn\tree\tree.py", line 412, in predict
X = self._validate_X_predict(X, check_input)
File "C:\Python27\lib\site-packages\sklearn\tree\tree.py", line 373, in _validate_X_predict
X = check_array(X, dtype=DTYPE, accept_sparse="csr")
File "C:\Python27\lib\site-packages\sklearn\utils\validation.py", line 441, in check_array
"if it contains a single sample.".format(array))
ValueError: Expected 2D array, got 1D array instead:
array=[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

修正后代碼：

#coding=utf-8

from sklearn.feature_extraction import DictVectorizer
import csv
from sklearn import tree
from sklearn import preprocessing
from sklearn.externals.six import StringIO

allElectronicsData = open(r"D:\workspace\python\files\AllElectronics.csv")

reader = csv.reader(allElectronicsData)
headers = reader.next()
print (headers)
featureList = []
labelList = []

for row in reader:
labelList.append(row[len(row)-1])
rowDict = {}
for i in range(1,len(row)-1):
rowDict[headers[i]]=row[i]
featureList.append(rowDict)
print (featureList)
#Vetorrize features
vec = DictVectorizer()
dummyX = vec.fit_transform(featureList).toarray()
print ("dummyx:" + str(dummyX))
print (vec.get_feature_names())

print ("labelList:" + str(labelList))
# vectorize class labels
lb =preprocessing.LabelBinarizer()
dummyY = lb.fit_transform(labelList)
print ("dummyY:"+ str(dummyY))

#Using decision tree for classification
clf = tree.DecisionTreeClassifier(criterion='entropy')
clf =clf.fit(dummyX,dummyY)
print ("clf:"+str(clf))

#Visualize mpdel
with open("allElectornicinformationGainOri.dot",'w')as f:
f = tree.export_graphviz(clf,feature_names=vec.get_feature_names(),out_file=f)
#dot 轉化成pdf 樹：dot -Tpdf " " -o output.pdf
oneRowx = dummyX[0,:].reshape(1, -1)
print ("oneRowx"+str(oneRowx))
#測試模型
newRowX = oneRowx
#這里有個坑，一定要注意維度 numpy！！！
newRowX[0][0] = 0
newRowX[0][2] = 1
newRowX.reshape(1, -1)print ("newRowx:" + str(newRowX))
predictedY = clf.predict(oneRowx)
print ("predictedY"+str(predictedY))

運行結果：

C:\Python27\python.exe D:/workspace/python/.idea/decision_tree.py
['RID', 'age', 'income', 'student', 'credit_rating', 'class_buys_computer']
[{'credit_rating': 'fair', 'age': 'youth', 'student': 'no', 'income': 'high'}, {'credit_rating': 'excellent', 'age': 'youth', 'student': 'no', 'income': 'high'}, {'credit_rating': 'fair', 'age': 'middle_aged', 'student': 'no', 'income': 'high'}, {'credit_rating': 'fair', 'age': 'senior', 'student': 'no', 'income': 'medium'}, {'credit_rating': 'fair', 'age': 'senior', 'student': 'yes', 'income': 'low'}, {'credit_rating': 'excellent', 'age': 'senior', 'student': 'yes', 'income': 'low'}, {'credit_rating': 'excellent', 'age': 'middle_aged', 'student': 'yes', 'income': 'low'}, {'credit_rating': 'fair', 'age': 'youth', 'student': 'no', 'income': 'medium'}, {'credit_rating': 'fair', 'age': 'youth', 'student': 'yes', 'income': 'low'}, {'credit_rating': 'fair', 'age': 'senior', 'student': 'yes', 'income': 'medium'}, {'credit_rating': 'excellent', 'age': 'youth', 'student': 'yes', 'income': 'medium'}, {'credit_rating': 'excellent', 'age': 'middle_aged', 'student': 'no', 'income': 'medium'}, {'credit_rating': 'fair', 'age': 'middle_aged', 'student': 'yes', 'income': 'high'}, {'credit_rating': 'excellent', 'age': 'senior', 'student': 'no', 'income': 'medium'}]
dummyx:[[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]
[0. 0. 1. 1. 0. 1. 0. 0. 1. 0.]
[1. 0. 0. 0. 1. 1. 0. 0. 1. 0.]
[0. 1. 0. 0. 1. 0. 0. 1. 1. 0.]
[0. 1. 0. 0. 1. 0. 1. 0. 0. 1.]
[0. 1. 0. 1. 0. 0. 1. 0. 0. 1.]
[1. 0. 0. 1. 0. 0. 1. 0. 0. 1.]
[0. 0. 1. 0. 1. 0. 0. 1. 1. 0.]
[0. 0. 1. 0. 1. 0. 1. 0. 0. 1.]
[0. 1. 0. 0. 1. 0. 0. 1. 0. 1.]
[0. 0. 1. 1. 0. 0. 0. 1. 0. 1.]
[1. 0. 0. 1. 0. 0. 0. 1. 1. 0.]
[1. 0. 0. 0. 1. 1. 0. 0. 0. 1.]
[0. 1. 0. 1. 0. 0. 0. 1. 1. 0.]]
['age=middle_aged', 'age=senior', 'age=youth', 'credit_rating=excellent', 'credit_rating=fair', 'income=high', 'income=low', 'income=medium', 'student=no', 'student=yes']
labelList:['no', 'no', 'yes', 'yes', 'yes', 'no', 'yes', 'no', 'yes', 'yes', 'yes', 'yes', 'yes', 'no']
dummyY:[[0]
[0]
[1]
[1]
[1]
[0]
[1]
[0]
[1]
[1]
[1]
[1]
[1]
[0]]
clf:DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=None,
max_features=None, max_leaf_nodes=None,
min_impurity_decrease=0.0, min_impurity_split=None,
min_samples_leaf=1, min_samples_split=2,
min_weight_fraction_leaf=0.0, presort=False, random_state=None,
splitter='best')
oneRowx[[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]]
newRowx:[[0. 0. 1. 0. 1. 1. 0. 0. 1. 0.]]
predictedY[0]

總結：注意維度，標紅位置

轉載于:https://www.cnblogs.com/mobiwangyue/p/8243979.html

總結

以上是生活随笔為你收集整理的决策树python建模中的坑：ValueError: Expected 2D array, got 1D array instead:的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇：移动web——基本知识点总结
下一篇： Python 爬取生成中文词云以爬取知乎