日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

Part2-Chapter8-预测乐高玩具套装价格

發(fā)布時(shí)間:2024/3/26 编程问答 43 豆豆
生活随笔 收集整理的這篇文章主要介紹了 Part2-Chapter8-预测乐高玩具套装价格 小編覺得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.
目標(biāo)是爬取ebay'上的二手樂高數(shù)據(jù),并使用嶺回歸交叉驗(yàn)證的方式給出回歸方程from bs4 import BeautifulSoup import numpy as np import randomdef scrapePage(retX, retY, inFile, yr, numPce, origPrc):# 打開并讀取HTML文件with open(inFile, encoding='utf-8') as f:html = f.read()soup = BeautifulSoup(html)i = 1# 根據(jù)HTML頁面結(jié)構(gòu)進(jìn)行解析currentRow = soup.find_all('table', r = "%d" % i)while(len(currentRow) != 0):currentRow = soup.find_all('table', r = "%d" % i)title = currentRow[0].find_all('a')[1].textlwrTitle = title.lower()# 查找是否有全新標(biāo)簽if (lwrTitle.find('new') > -1) or (lwrTitle.find('nisb') > -1):newFlag = 1.0else:newFlag = 0.0# 查找是否已經(jīng)標(biāo)志出售,我們只收集已出售的數(shù)據(jù)soldUnicde = currentRow[0].find_all('td')[3].find_all('span')if len(soldUnicde) == 0:print("商品 #%d 沒有出售" % i)else:# 解析頁面獲取當(dāng)前價(jià)格soldPrice = currentRow[0].find_all('td')[4]priceStr = soldPrice.textpriceStr = priceStr.replace('$','')priceStr = priceStr.replace(',','')if len(soldPrice) > 1:priceStr = priceStr.replace('Free shipping', '')sellingPrice = float(priceStr)# 去掉不完整的套裝價(jià)格if sellingPrice > origPrc * 0.5:print("%d\t%d\t%d\t%f\t%f" % (yr, numPce, newFlag, origPrc, sellingPrice))retX.append([yr, numPce, newFlag, origPrc])retY.append(sellingPrice)i += 1currentRow = soup.find_all('table', r = "%d" % i)#分別抓取各網(wǎng)頁數(shù)據(jù) def setDataCollect(retX, retY):scrapePage(retX, retY, 'lego8288.html', 2006, 800, 49.99) scrapePage(retX, retY, 'lego10030.html', 2002, 3096, 269.99) scrapePage(retX, retY, 'lego10179.html', 2007, 5195, 499.99) scrapePage(retX, retY, 'lego10181.html', 2007, 3428, 199.99) scrapePage(retX, retY, 'lego10189.html', 2008, 5922, 299.99) scrapePage(retX, retY, 'lego10196.html', 2009, 3263, 249.99)#標(biāo)準(zhǔn)化 def regularize(xMat,yMat):inxMat = xMat.copy()inyMat = yMat.copy()yMean = np.mean(yMat,0)inyMat = yMat - yMeaninMeans = np.mean(inxMat,0)inVar = np.var(inxMat,0)print(inMeans)inxMat = (inxMat - inMeans)/inVar計(jì)算平方誤差 def rssError(yArr,yHatArr):return ((yArr - yHatArr)**2).sum()#計(jì)算回歸系數(shù)W def standRegres(xArr,yArr):xMat = np.mat(xArr)yMat = np.mat(yArr).TxTx = xMat.T * xMatif np.linalg.det(xTx) == 0.0:print("無法求逆")returnws = xTx.I * (xMat.T * yMat)return ws#交叉驗(yàn)證嶺回歸 def crossValidation(xArr,yArr,numVal = 10):#得到數(shù)據(jù)數(shù)m = len(yArr)#建索引表indexList = list(range(m))#誤差表errorMat = np.zeros((numVal,30))#交叉驗(yàn)證numVal次for i in range(numVal):trainX = []trainY = []testX = []testY = []#將數(shù)據(jù)“洗牌”random.shuffle(indexList)#劃分訓(xùn)練集與測(cè)試集for j in range(m):if j < m*0.9:trainX.append(xArr[indexList[j]])trainY.append(yArr[indexList[j]])else:testX.append(xArr[indexList[j]])testY.append(yArr[indexList[j]])wMat = ridgeTest(trainX,trainY)#對(duì)每次交叉驗(yàn)證,計(jì)算三十個(gè)lamda的系數(shù)for k in range(30):matTestX = np.mat(testX)matTrainX = np.mat(trainX)meanTrain = np.mean(matTrainX,0)varTrain = np.var(matTrainX,0)matTestX = (matTestX - meanTrain)/varTrainyEst = matTestX * np.mat(wMat[k,:]).T + np.mean(trainY)errorMat[i,k] = rssError(yEst.T.A,np.array(testY))#得到最小誤差的系數(shù)meanErrors = np.mean(errorMat,0)minMean = float(min(meanErrors)) bestWeights = wMat[np.nonzero(meanErrors == minMean)]xMat = np.mat(xArr)yMat = np.mat(yArr)meanX = np.mean(xMat,0)varX = np.var(xMat,0)#逆標(biāo)準(zhǔn)化數(shù)據(jù)unReg = bestWeights / varXprint('%f%+f*年份%+f*部件數(shù)量%+f*是否全新%+f*原價(jià)'%((-1 * np.sum(np.multiply(meanX,unReg))+np.mean(yMat)),unReg[0,0],unReg[0,1],unReg[0,2],unReg[0,3]))#嶺回歸測(cè)試 def ridgeTest(xArr,yArr):xMat = np.mat(xArr)yMat = np.mat(yArr).TyMean = np.mean(yMat,axis = 0)yMat = yMat-yMeanxMeans = np.mean(xMat,axis = 0)xVar = np.var(xMat,axis = 0)xMat = (xMat - xMeans)/xVarnumTestPts = 30wMat = np.zeros((numTestPts,np.shape(xMat)[1]))for i in range(numTestPts):ws = ridgeRegres(xMat,yMat,np.exp(i-10))wMat[i,:] = ws.Treturn wMatif __name__ == "__main__":lgX = []lgY = []setDataCollect(lgX,lgY)crossValidation(lgX,lgY)

總結(jié)

以上是生活随笔為你收集整理的Part2-Chapter8-预测乐高玩具套装价格的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯(cuò),歡迎將生活随笔推薦給好友。