机器学习实战-第二章代码+注释-KNN
生活随笔
收集整理的這篇文章主要介紹了
机器学习实战-第二章代码+注释-KNN
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
#-*- coding:utf-8 -*-
#https://blog.csdn.net/fenfenmiao/article/details/52165472
from numpy import * #科學計算包import operator #運算符模塊
import matplotlib
import matplotlib.pyplot as plt
#matplotlib.pyplot是一些命令行風格函數的集合from os import listdir #列出給定目錄的文件名def createDataSet():group = array([[1.0, 1.1], [1.0, 1.0], [0, 0], [0, 0.1]])labels = ['A', 'A', 'B', 'B']return group, labelsdef classify0(inX, dataSet, labels, k) :dataSetSize = dataSet.shape[0] #查看矩陣或者數組的維數 c.shape[1] 為第一維的長度,c.shape[0] 為第二維的長度 此處為4#(dataSetSize, 1)使數組重復完是四行一樣的 而不是在1行中。diffMat = tile(inX, (dataSetSize, 1)) - dataSet #numpy.tile(A,reps) tile共有2個參數,A指待輸入數組,reps則決定A重復的次數。整個函數用于重復數組A來構建新的數組。sqDiffMat = diffMat ** 2 #冪 (x1 - x2)的冪sqDistance = sqDiffMat.sum(axis = 1) #每行相加 橫著相加distances = sqDistance ** 0.5 #開根號sortedDistIndicies = distances.argsort() #argsort是排序,將元素按照由小到大的順序返回下標classCount = {} #dict字典數據類型,字典是Python中唯一內建的映射類型for i in range(k) :voteIlabel = labels[sortedDistIndicies[i]]#get是取字典里的元素,如果之前這個voteIlabel是有的,那么就返回字典里這個voteIlabel里的值,如果沒有就返回0(后面寫的),這行代碼的意思就是算離目標點距離最近的k個點的類別,這個點是哪個類別哪個類別就加1classCount[voteIlabel] = classCount.get(voteIlabel, 0) + 1#key=operator.itemgetter(1)的意思是按照字典里的第一個排序,{A:1,B:2},要按照第1個(AB是第0個),即‘1’‘2’排序。reverse=True是降序排序sortedClassCount = sorted(classCount.iteritems(), key = operator.itemgetter(0), reverse = True)return sortedClassCount[0][0]def file2matrix(filename):fr = open(filename)# 一次讀取整個文本數據,并且自動將文件內容分析成一個行的列表,比readline()快 ,后面的img2vector就是使用的readline(),因為要逐行逐個讀取,可以對比一下arrayOLines = fr.readlines()numberOfLines = len(arrayOLines)#返回來一個給定形狀和類型的用0填充的數組;returnMat = zeros((numberOfLines, 3)) #文件有幾行就是幾行,設置為3列(可調)classLabelVector = []index = 0for line in arrayOLines :line = line.strip() #去掉回車符listFromLine = line.split('\t') #分成了4列數據,得到了4個列表#將數據前三列提取出來,存放到returnMat的NumPy矩陣中,也就是特征矩陣returnMat[index, :] = listFromLine[0 : 3] #前3個列表元素是愛倫要的特征,取出來去填充returnMat#classLabelVector.append(int(listFromLine[-1]))if listFromLine[-1] == 'didntLike':classLabelVector.append(1)elif listFromLine[-1] == 'smallDoses':classLabelVector.append(2)elif listFromLine[-1] == 'largeDoses':classLabelVector.append(3)index += 1return returnMat, classLabelVector#歸一化
def autoNorm(dataSet) :#min(0)返回該矩陣中每一列的最小值#min(1)返回該矩陣中每一行的最小值#max(0)返回該矩陣中每一列的最大值#max(1)返回該矩陣中每一行的最大值minVals = dataSet.min(0)maxVals = dataSet.max(0)ranges = maxVals - minVals#得到數據集的行數 shape方法用來得到矩陣或數組的維數normDataSet = zeros(shape(dataSet))m = dataSet.shape[0]normDataSet = dataSet - tile(minVals, (m, 1))normDataSet = normDataSet / tile(ranges, (m, 1))return normDataSet, ranges, minValsdef datingClassTest() :hoRatio = 0.10datingDataMatm, datingLabels = file2matrix('F:\jxq\Desktop\datingTestSet.txt')normMat, ranges, minVals = autoNorm(datingDataMat) #歸一化m = normMat.shape[0] #二維數組維度大小numTestVecs = int(m * hoRatio) #訓練樣本從第m * hoRatio 開始errorCount = 0.0for i in range(numTestVecs):classifierResult = classify0(normMat[i, :], normMat[numTestVecs:m, :], datingLabels[numTestVecs : m], 3)print("the classifier came back with : %d, the real answer is : %d" % (classifierResult, datingLabels[i]))if (classifierResult != datingLabels[i]) :errorCount += 1.0print("the total error rate is : %f" % (errorCount / float(numTestVecs)))def classifyPerson() :resultList = ['not at all', 'in small doses', 'in large doses']percentTats = float(raw_input("percentage of time spent playing video games?"))ffMiles = float(raw_input("frequent flier miles earned per year?"))iceCream = float(raw_input("liters of ice cream consumed per year?"))datingDataMat, datingLables = file2matrix('F:\jxq\Desktop\datingTestSet.txt')normMat, ranges, minVals = autoNorm(datingDataMat)inArr = array([ffMiles, percentTats, iceCream])classifierResult = classify0((inArr-minVals)/ranges, normMat, datingLables, 3)print ("You will probably like this person:", resultList[classifierResult - 1]) #索引從0開始,索引減去1才能索引到對應的resultListdef img2vector(filename) :returnVect = zeros((1, 1024)) # 用于保存1x1024的向量fr = open(filename)for i in range(32) :lineStr = fr.readline()for j in range(32) :returnVect[0, 32*i+j] = int(lineStr[j]) # 字符需要強制類型轉換成整數return returnVectdef handwritingClassTest() :hwLabels = []#獲取目錄內容trainingFileList = listdir('F:\\jxq\\Documents\\Tencent Files\\834810071\\FileRecv\\machinelearninginaction-master\\machinelearninginaction-master\\Ch02\\digits\\trainingDigits')m = len(trainingFileList)trainingMat = zeros((m, 1024))for i in range(m) :fileNameStr = trainingFileList[i]fileStr = fileNameStr.split('.')[0] #無后綴文件名classNumStr = int(fileStr.split('_')[0]) #獲取文件內的數字
hwLabels.append(classNumStr)#圖片轉換為向量trainingMat[i, :] = img2vector('F:\\jxq\\Documents\\Tencent Files\\834810071\\FileRecv\\machinelearninginaction-master\\machinelearninginaction-master\\Ch02\\digits\\trainingDigits\\%s' %fileNameStr)testFileList = listdir('F:\\jxq\\Documents\\Tencent Files\\834810071\\FileRecv\\machinelearninginaction-master\\machinelearninginaction-master\\Ch02\\digits\\testDigits')errorCount = 0.0mTest = len(testFileList)for i in range(mTest):fileNameStr = testFileList[i]fileStr = fileNameStr.split('.')[0]classNumStr = int(fileStr.split('_')[0])vectorUnderTest = img2vector('F:\\jxq\\Documents\\Tencent Files\\834810071\\FileRecv\\machinelearninginaction-master\\machinelearninginaction-master\\Ch02\\digits\\testDigits\\%s' %fileNameStr)classifierResult = classify0(vectorUnderTest, trainingMat, hwLabels, 3) #分類print("the classifier came back with: %d, the real answer is: %d" % (classifierResult, classNumStr))if (classifierResult != classNumStr) :errorCount += 1.0print("\nthe total number of error is: %d" %errorCount)print("\nthe total error rate is: %f" %(errorCount/float(mTest)))if __name__ == '__main__':#filename = 'F:\jxq\Desktop\datingTestSet.txt'#datingDataMat, datingLabels = file2matrix(filename)'''print(datingDataMat)print(datingLabels)fig = plt.figure() #figure()來創建多個圖ax = fig.add_subplot(111) #參數349的意思是:將畫布分割成3行4列,圖像畫在從左到右從上到下的第9塊#后兩個是顏色ax.scatter(datingDataMat[:, 1], datingDataMat[:, 2], 15.0*array(datingLabels), 15.0*array(datingLabels)) #繪制散點圖 X[:,0]就是取所有行的第0個數據, X[:,1] 就是取所有行的第1個數據''''''normMat, range, minVals = autoNorm(datingDataMat)print(normMat)print(range)print(minVals)'''#datingClassTest()#plt.show()#classifyPerson()#testVector = img2vector('F:\\jxq\\Documents\\Tencent Files\\834810071\\FileRecv\\machinelearninginaction-master\\machinelearninginaction-master\\Ch02\\digits\\testDigits\\0_13.txt')#print(testVector[0, 0:31])#print(testVector[0, 32:63])handwritingClassTest()
?
轉載于:https://www.cnblogs.com/NEU-2015/p/9201153.html
總結
以上是生活随笔為你收集整理的机器学习实战-第二章代码+注释-KNN的全部內容,希望文章能夠幫你解決所遇到的問題。