日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

天池工业蒸汽预测

發布時間:2025/3/21 编程问答 70 豆豆
生活随笔 收集整理的這篇文章主要介紹了 天池工业蒸汽预测 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

導入數據

import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet from sklearn.neighbors import KNeighborsRegressor from sklearn.ensemble import GradientBoostingRegressorfrom xgboost import XGBRegressor from lightgbm import LGBMRegressorfrom sklearn.svm import SVR from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler,StandardScaler,PolynomialFeatures train = pd.read_csv(r"C:\Users\dream\Documents\Tencent Files\1799785728\FileRecv\zhengqi_train的副本.txt",sep = "\t") test = pd.read_csv(r"C:\Users\dream\Documents\Tencent Files\1799785728\FileRecv\zhengqi_test的副本.txt",sep = "\t")train["origin"] = "train" test["origin"] = "test"data_all = pd.concat([train,test]) print(data_all.shape) data_all.head()

結果:

(4813, 40)

特征分布做特征選擇?

#特征探索 #38個特征,將一些不重要的特征刪除 #特征分布情況,訓練和測試數據特征分布不均勻刪除 plt.figure(figsize=(9,38*6)) for i,col in enumerate(data_all.columns[:-2]):cond = data_all["origin"] == "train"train_col = data_all[col][cond] #訓練數據cond = data_all["origin"] == "test"test_col = data_all[col][cond] #測試數據axes = plt.subplot(38,1,i+1)ax = sns.kdeplot(train_col,shade = True,ax = axes)sns.kdeplot(test_col,shade = True,ax = ax)

?

drop_labels = ["V11","V17","V5","V22"] data_all.drop(drop_labels,axis = 1,inplace = True)

按相關性進行特征選擇?

#相關性系數,找到7個相關性不大的屬性 corr = data_all.corr() cond = corr.loc["target"].abs() < 0.1 drop_labels = corr.loc["target"].index[cond] drop_labels #查看了屬性分布,分布不好的刪除 drop_labels = ['V14', 'V21'] data_all.drop(drop_labels,axis = 1,inplace = True) #畫熱力圖 plt.figure(figsize = (20,16)) mcorr = train.corr() mask = np.zeros_like(mcorr, dtype = np.bool)mask[np.triu_indices_from(mask)] = True #角分線右側為True cmap = sns.diverging_palette(220, 10, as_cmap = True) #顏色設定 g = sns.heatmap(mcorr, mask = mask, cmap = cmap, square = True, annot = True, fmt = "0.2f") #annot注釋的意思

?歸一化操作

#歸一化操作 data = data_all.iloc[:,:-2] stand = StandardScaler() data2 = stand.fit_transform(data) data2

結果:?

array([[ 0.56597193, 0.01591954, -0.14297122, ..., -5.1011359 ,-2.60811143, -3.50838833],[ 0.96800866, 0.43695666, 0.06604898, ..., 0.36409451,-0.33491851, -0.7301463 ],[ 1.01301277, 0.56796822, 0.23506531, ..., 0.36409451,0.76517485, -0.58913402],...,[-2.61531873, -3.56439613, -3.4022862 , ..., -1.53698564,-2.54410599, 1.61205774],[-2.66132294, -3.64640336, -3.27127354, ..., -1.53698564,-2.54910642, 1.43104197],[-2.32129187, -3.03734966, -3.21426803, ..., -1.53698564,-1.12298539, 1.9880905 ]]) cols = data_all.columns data_all_std = pd.DataFrame(data2,columns = cols[:-2]) data_all.index = np.arange(4813) data_all_std = pd.merge(data_all_std, data_all.iloc[:,-2:], right_index=True,left_index=True) data_all_std

異常值處理

#異常值處理 from sklearn.linear_model import RidgeCV data_all_std.head()ridge = RidgeCV(alphas = [0.0001,0.001,0.01,0.1,0.2,0.5,1,2,3,4,5,10,20,30,50]) cond = data_all_std["origin"] == "train" X_train = data_all_std[cond].iloc[:,:-2] y_train = data_all_std[cond]["target"]ridge.fit(X_train,y_train) #預測,預測值肯定會和真實值有一定的偏差,偏差特別大,當作異常值 y_ = ridge.predict(X_train) display(y_train[:100]) display(y_[:100])cond = abs((y_train - y_)) > y_train.std()*0.8 cond.sum()

結果:

0 0.175 1 0.676 2 0.633 3 0.206 4 0.384... 95 0.752 96 0.669 97 -0.144 98 0.900 99 0.194 Name: target, Length: 100, dtype: float64 array([ 0.75989853, 0.67404957, 0.698052 , 0.26892304, 0.49745972,0.34479599, 0.56407434, 0.65300985, 0.87863292, 1.10631617,1.14381793, 1.12698163, -0.95261698, -1.14030135, -0.34147371,-1.18688819, -0.3570815 , -1.460894 , -0.46738849, 0.20482328,0.35996288, 0.13898213, 0.62397264, -0.03227538, -1.86061414,-1.98766287, 0.42573393, 1.0067669 , 1.20290325, 0.58591695,0.37433565, -0.28182672, -0.48744524, -0.51845319, 0.39441366,0.80716124, 0.55676449, 0.31542392, -0.06445492, -1.0259794 ,-0.02866304, -0.56256517, -0.10068991, 0.30875929, 0.27981951,0.61037409, 0.857841 , 0.73098524, 0.84739155, 0.64799208,0.30178805, 0.32478358, 0.15008424, -0.10947188, -0.06684293,0.00231872, 0.05027197, -0.52516486, -0.56590262, -0.37154893,0.3188893 , -0.11236033, -0.23167792, 0.72161844, 0.99240655,0.58891927, 0.5994198 , 0.49652549, 0.44206633, 0.13839513,0.52453052, 0.25111694, -0.74958371, -0.32670514, -0.43136215,-0.15506445, -0.65928348, -0.26046392, 0.1737138 , 0.15865807,0.26452441, 0.00243442, 0.49720354, 0.21106626, 0.81243771,0.95389209, 0.97652435, 0.5122789 , 0.98314069, 0.88664648,1.05821294, 1.02201547, 1.15680039, 0.35648533, 0.64549981,0.48716165, 0.50820392, -0.30307674, 0.58959149, 0.2912567 ]) 88 #畫圖 plt.figure(figsize=(12,6)) axes = plt.subplot(1,3,1) axes.scatter(y_train,y_) axes.scatter(y_train[cond],y_[cond],c = "r", s = 20)axes = plt.subplot(1,3,2) axes.scatter(y_train,y_train - y_) axes.scatter(y_train[cond],(y_train - y_)[cond],c = "r", s = 20)axes = plt.subplot(1,3,3) (y_train - y_).hist(bins = 50,ax = axes) (y_train - y_)[cond].hist(bins = 50,ax = axes ,color = "r")

#異常值過濾 drop_index = cond[cond].index print(data_all_std.shape) data_all_std.drop(drop_index,axis = 0,inplace = True) data_all_std.shape

?結果:

(4813, 34) (4725, 34)

機器學習模型?

def detect_model(estimators,data):for key,estimator in estimators.items():estimator.fit(data[0],data[2])y_ = estimator.predict(data[1])mse = mean_squared_error(data[3],y_)print("-----------------mse%s"%(key),mse)r2 = estimator.score(data[1],data[3])print("-----------------r2%s"%(key),r2)print("\n")cond = data_all_std["origin"] == "train" X = data_all_std[cond].iloc[:,:-2] y = data_all_std[cond]["target"] data = train_test_split(X,y,test_size = 0.2)? estimators = {} estimators["knn"] = KNeighborsRegressor() estimators["linear"] = LinearRegression() estimators["ridge"] = Ridge() estimators["lasso"] = Lasso() estimators["elasticnet"] = ElasticNet() estimators["forest"] = RandomForestRegressor() estimators["gbdt"] = GradientBoostingRegressor() estimators["ada"] = AdaBoostRegressor() estimators["extreme"] = ExtraTreesRegressor() estimators["svm_rbf"] = SVR(kernel="rbf") estimators["svm_poly"] = SVR(kernel="poly") estimators["xgb"] = XGBRegressor()detect_model(estimators,data)?

結果:

-----------------mseknn 0.15662542121428571 -----------------r2knn 0.8337471706629949-----------------mselinear 0.08535934118139471 -----------------r2linear 0.909393814415771-----------------mseridge 0.0854724544808449 -----------------r2ridge 0.9092737482992783-----------------mselasso 0.9420917733705356 -----------------r2lasso -1.0238966341447053e-08-----------------mseelasticnet 0.5575013990170281 -----------------r2elasticnet 0.40823025900047394-----------------mseforest 0.09774924294 -----------------r2forest 0.8962423335986341-----------------msegbdt 0.09536685168518329 -----------------r2gbdt 0.8987711650209556-----------------mseada 0.11436047541876887 -----------------r2ada 0.8786100464707883-----------------mseextreme 0.09095247737375 -----------------r2extreme 0.9034568808730732-----------------msesvm_rbf 0.10915159147460973 -----------------r2svm_rbf 0.8841391086542493-----------------msesvm_poly 0.26247224325213714 -----------------r2svm_poly 0.7213941854087838-----------------msexgb 0.0951415171478096 -----------------r2xgb 0.8990103503594229 estimators = {} #estimators["linear"] = LinearRegression() #estimators["ridge"] = Ridge() 兩個線性回歸容易過擬合 estimators["forest"] = RandomForestRegressor() estimators["gbdt"] = GradientBoostingRegressor() estimators["ada"] = AdaBoostRegressor() estimators["extreme"] = ExtraTreesRegressor() estimators["svm_rbf"] = SVR(kernel="rbf") estimators["xgb"] = XGBRegressor()cond = data_all_std["origin"] == "train" X_train = data_all_std[cond].iloc[:,:-2] y_train = data_all_std[cond]["target"] cond = data_all_std["origin"] == "test" X_test = data_all_std[cond].iloc[:,:-2]#一個算法一個預測結果,將結果合并 y_pred = [] for key,model in estimators.items():model.fit(X_train,y_train)y_ = model.predict(X_test)y_pred.append(y_)y_ = np.mean(y_pred,axis = 0) #一個算法一個預測結果,將結果合并 y_pred = [] for key,model in estimators.items():model.fit(X_train,y_train)y_ = model.predict(X_test)y_pred.append(y_)y_ = np.mean(y_pred,axis = 0) pd.Series(y_).to_csv("./emsemble.txt",index = False) # y_ 預測值,和真實值之間差距,將預測值當成新的特征,讓我們算法進行再學習 for key,model in estimators.items():model.fit(X_train,y_train)y_ = model.predict(X_train)X_train[key] = y_y_ = model.predict(X_test)X_test[key] = y_y_pred = [] for key,model in estimators.items():model.fit(X_train,y_train)y_ = model.predict(X_test)y_pred.append(y_) y_ = np.mean(y_pred,axis = 0)pd.Series(y_).to_csv("./emsemble2.txt",index = False)

數據歸一化

#歸一化的數據 data_all_norm = pd.DataFrame(data3,columns = data_all.columns[:-2]) data_all_normdata_all_norm = pd.merge(data_all_norm,data_all.iloc[:,-2:],left_index=True,right_index=True)

特征工程——box-cox變換

import seaborn as sns import matplotlib.pyplot as plt from scipy import stats import warnings warnings.filterwarnings("ignore") def scale_minmax(data):return (data - data.min())/(data.max()-data.min()) fcols = 6 frows = len(data_all_norm.columns[:-2]) plt.figure(figsize=(4*fcols,4*frows)) i=0for var in data_all_norm.columns[:-2]:dat = data_all_norm[[var, 'target']].dropna()#這條線就是數據的分布dist:distributioni+=1plt.subplot(frows,fcols,i)sns.distplot(dat[var] , fit=stats.norm);plt.title(var+' Original')plt.xlabel('')#第二張圖,skew是統計分析中的一個屬性,越是正態,該系數越小i+=1plt.subplot(frows,fcols,i)_=stats.probplot(dat[var], plot=plt)plt.title('skew='+'{:.4f}'.format(stats.skew(dat[var])))plt.xlabel('')plt.ylabel('') #第三張圖,散點圖 i+=1plt.subplot(frows,fcols,i)plt.plot(dat[var], dat['target'],'.',alpha=0.5)plt.title('corr='+'{:.2f}'.format(np.corrcoef(dat[var], dat['target'])[0][1]))#數據分布圖i+=1plt.subplot(frows,fcols,i)trans_var, lambda_var = stats.boxcox(dat[var].dropna()+1)trans_var = scale_minmax(trans_var) sns.distplot(trans_var , fit=stats.norm);plt.title(var+' Tramsformed')plt.xlabel('')#偏斜度i+=1plt.subplot(frows,fcols,i)_=stats.probplot(trans_var, plot=plt)plt.title('skew='+'{:.4f}'.format(stats.skew(trans_var)))plt.xlabel('')plt.ylabel('')#散點圖i+=1plt.subplot(frows,fcols,i)plt.plot(trans_var, dat['target'],'.',alpha=0.5)plt.title('corr='+'{:.2f}'.format(np.corrcoef(trans_var,dat['target'])[0][1]))

總結

以上是生活随笔為你收集整理的天池工业蒸汽预测的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。

主站蜘蛛池模板: 日韩av片在线播放 | 国产午夜久久久 | 亚洲av成人精品毛片 | 91亚洲精品久久久久久久久久久久 | 在线a网站 | 自拍偷拍精品视频 | 国产精品欧美久久久久天天影视 | 看黄色a级片 | 浴室里强摁做开腿呻吟男男 | 国产色区 | 久久精品一日日躁夜夜躁 | 免费处女在线破视频 | www日韩| a级黄色在线观看 | 亚洲美女综合 | 亚洲天堂资源网 | 一区二区视频在线观看 | 综合国产在线 | 国产亚洲精品网站 | 色综合99久久久无码国产精品 | 国产视频一区二区 | 黄色精品 | 精品资源成人 | 哪里可以看毛片 | 日韩免费福利 | 国产精品成av人在线视午夜片 | 超碰人人人人人人 | 黄色小视频在线免费观看 | 182av| 亚洲精品一卡二卡 | 999久久久久久 | 亚洲美女影院 | 动漫av网| 七月色| 色吧久久 | 欧美日韩国产免费观看 | 亚洲一区二区三区四区在线观看 | 欧美精品福利视频 | 一区二区三区欧美视频 | 狠狠干网| 五月天色网站 | 熟女少妇在线视频播放 | 亚洲影视一区二区三区 | 亚洲精品观看 | 国产99久久 | 精品无码久久久久久久久成人 | 毛片1000部免费看 | 偷拍久久久 | 国产婷婷一区二区三区 | 欧洲精品久久一区二区 | av免费的| 九色精品| 一级片免费在线 | 草草在线观看视频 | 99国产热| 国产第二页 | 久久久黄色片 | 少妇免费视频 | 日本一区二区在线观看视频 | 正在播放国产一区 | 久久久久久久久久久久电影 | 亚洲网av| 大尺度av | 国产伦精品一区二区三区在线 | 国产又粗又长 | 一本大道熟女人妻中文字幕在线 | 丰满熟女人妻一区二区三区 | 久久一视频 | 亚洲中文字幕无码不卡电影 | 99人妻碰碰碰久久久久禁片 | 在线观看日本 | 国产午夜精品一区二区 | 日日夜夜超碰 | 福利精品在线 | 综合天天色 | 尤物视频在线观看国产性感 | 日韩欧美中文 | 婷婷久久亚洲 | 亚欧洲精品在线视频免费观看 | 久久久久亚洲精品 | 欧美一区二区久久久 | 国产精品毛片视频 | 久久久久久国产精品免费免费 | 黑人大群体交免费视频 | 一二级毛片 | 二级黄色录像 | 中文字幕7| 久久久久人妻一区二区三区 | 国产日韩欧美另类 | 蜜桃视频中文字幕 | 国产成人久久77777精品 | 亚洲欧美变态另类丝袜第一区 | 新超碰在线| 日韩av二区 | 中文字幕一区二区人妻电影丶 | 两口子交换真实刺激高潮 | 欧美日韩成人一区二区三区 | 国产成人免费视频网站 | 日韩一区二区在线播放 |