线性回归预测
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
from sklearn import metrics
from math import sqrt
features = pd.read_csv('/Users/mac/Desktop/111.csv', encoding='gbk')
print('The shape of our features is:', features.shape)
# one-hot 編碼
features = pd.get_dummies(features)
labels = np.array(features['price'])
# 數據
features = features.drop('price', axis=1)
# 轉變成列表
feature_list = list(features.columns)
# 轉變成np.array格式
features = np.array(features)
# 訓練集與測試集劃分
from sklearn.model_selection import train_test_split
train_features, test_features, train_labels, test_labels = train_test_split(features, labels, test_size=0.25,random_state=0)
from sklearn import linear_model
reg=linear_model.LinearRegression()
reg.fit(train_features,train_labels)
predictions =reg.predict(test_features)
# 計算絕對誤差,abs為生成絕對值
errors = abs(predictions - test_labels)
#alen為標簽的個數
n = np.alen(test_labels)
# 計算 平均絕對百分誤差mean absolute percentage error (MAPE)
#round對指定位數值進行四舍五入
MAPE = np.sum(errors / test_labels) / n
print('Mean Absolute Percentage Error:', round(MAPE, 2))
#計算R2
R2=r2_score(test_labels, predictions)
print('R2:',round(R2,2))
#計算模型準確率
mape = np.mean(100*(errors/test_labels))
accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')
import matplotlib.pyplot as plt
#plt.rcParams['font.sans-serif'] = [u'SimHei']
plt.rcParams['axes.unicode_minus'] = False
#設置圖片長寬比
plt.rcParams['figure.figsize']=(20.0,4.0)
#設置圖片像素
plt.rcParams['savefig.dpi']=600
acture_price = test_labels # 實際值數據
reg_Pricing_price = predictions # 預測值數據
plt.plot(acture_price, color="r", label="acture_price") # 顏色表示
plt.plot(reg_Pricing_price, color='b', label="HP_Pricing_price")
plt.xlabel("sample") # x軸命名表示
plt.ylabel("price") # y軸命名表示
plt.title("Actual Price vs Pricing Price")
plt.legend() # 增加圖例
plt.show() # 顯示圖
總結
- 上一篇: 千元价位两款新机,小米6x和荣耀8x对比
- 下一篇: 电影票选座