當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授

發布時間：2025/4/5 编程问答 22 豆豆

生活随笔收集整理的這篇文章主要介紹了 1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

回歸模塊

回歸模塊中提供了批量梯度下降和隨機梯度下降兩種學習策略來訓練模型：

# coding: utf-8 # linear_regression/regression.py import numpy as np import matplotlib as plt import timedef exeTime(func):""" 耗時計算裝飾器"""def newFunc(*args, **args2):t0 = time.time()back = func(*args, **args2)return back, time.time() - t0return newFuncdef loadDataSet(filename):""" 讀取數據從文件中獲取數據，在《機器學習實戰中》，數據格式如下"feature1 TAB feature2 TAB feature3 TAB label"Args:filename: 文件名Returns:X: 訓練樣本集矩陣y: 標簽集矩陣"""numFeat = len(open(filename).readline().split('\t')) - 1X = []y = []file = open(filename)for line in file.readlines():lineArr = []curLine = line.strip().split('\t')for i in range(numFeat):lineArr.append(float(curLine[i]))X.append(lineArr)y.append(float(curLine[-1]))return np.mat(X), np.mat(y).Tdef h(theta, x):"""預測函數Args:theta: 相關系數矩陣x: 特征向量Returns:預測結果"""return (theta.T*x)[0,0]def J(theta, X, y):"""代價函數Args:theta: 相關系數矩陣X: 樣本集矩陣y: 標簽集矩陣Returns:預測誤差（代價）"""m = len(X)return (X*theta-y).T*(X*theta-y)/(2*m)@exeTime def bgd(rate, maxLoop, epsilon, X, y):"""批量梯度下降法Args:rate: 學習率maxLoop: 最大迭代次數epsilon: 收斂精度X: 樣本矩陣y: 標簽矩陣Returns:(theta, errors, thetas), timeConsumed"""m,n = X.shape# 初始化thetatheta = np.zeros((n,1))count = 0converged = Falseerror = float('inf')errors = []thetas = {}for j in range(n):thetas[j] = [theta[j,0]]while count<=maxLoop:if(converged):breakcount = count + 1for j in range(n):deriv = (y-X*theta).T*X[:, j]/mtheta[j,0] = theta[j,0]+rate*derivthetas[j].append(theta[j,0])error = J(theta, X, y)errors.append(error[0,0])# 如果已經收斂if(error < epsilon):converged = Truereturn theta,errors,thetas@exeTime def sgd(rate, maxLoop, epsilon, X, y):"""隨機梯度下降法Args:rate: 學習率maxLoop: 最大迭代次數epsilon: 收斂精度X: 樣本矩陣y: 標簽矩陣Returns:(theta, error, thetas), timeConsumed"""m,n = X.shape# 初始化thetatheta = np.zeros((n,1))count = 0converged = Falseerror = float('inf')errors = []thetas = {}for j in range(n):thetas[j] = [theta[j,0]]while count <= maxLoop:if(converged):breakcount = count + 1errors.append(float('inf'))for i in range(m):if(converged):breakdiff = y[i,0]-h(theta, X[i].T)for j in range(n):theta[j,0] = theta[j,0] + rate*diff*X[i, j]thetas[j].append(theta[j,0])error = J(theta, X, y)errors[-1] = error[0,0]# 如果已經收斂if(error < epsilon):converged = Truereturn theta, errors, thetas

代碼結合注釋應該能看懂，借助于Numpy，只是復現了課上的公式。

測試程序

bgd測試程序

# coding: utf-8 # linear_regression/test_bgd.py import regression from matplotlib import cm from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as npif **name** == "**main**":X, y = regression.loadDataSet('data/ex1.txt');m,n = X.shapeX = np.concatenate((np.ones((m,1)), X), axis=1)rate = 0.01maxLoop = 1500epsilon =0.01result, timeConsumed = regression.bgd(rate, maxLoop, epsilon, X, y)theta, errors, thetas = result# 繪制擬合曲線fittingFig = plt.figure()title = 'bgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)ax = fittingFig.add_subplot(111, title=title)trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])xCopy = X.copy()xCopy.sort(0)yHat = xCopy*thetafittingLine, = ax.plot(xCopy[:,1], yHat, color='g')ax.set_xlabel('Population of City in 10,000s')ax.set_ylabel('Profit in $10,000s')plt.legend([trainingSet, fittingLine], ['Training Set', 'Linear Regression'])plt.show()# 繪制誤差曲線errorsFig = plt.figure()ax = errorsFig.add_subplot(111)ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.4f'))ax.plot(range(len(errors)), errors)ax.set_xlabel('Number of iterations')ax.set_ylabel('Cost J')plt.show()# 繪制能量下降曲面size = 100theta0Vals = np.linspace(-10,10, size)theta1Vals = np.linspace(-2, 4, size)JVals = np.zeros((size, size))for i in range(size):for j in range(size):col = np.matrix([[theta0Vals[i]], [theta1Vals[j]]])JVals[i,j] = regression.J(col, X, y)theta0Vals, theta1Vals = np.meshgrid(theta0Vals, theta1Vals)JVals = JVals.TcontourSurf = plt.figure()ax = contourSurf.gca(projection='3d')ax.plot_surface(theta0Vals, theta1Vals, JVals, rstride=2, cstride=2, alpha=0.3,cmap=cm.rainbow, linewidth=0, antialiased=False)ax.plot(thetas[0], thetas[1], 'rx')ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')ax.set_zlabel(r'$J(\theta)$')plt.show()# 繪制能量輪廓contourFig = plt.figure()ax = contourFig.add_subplot(111)ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')CS = ax.contour(theta0Vals, theta1Vals, JVals, np.logspace(-2,3,20))plt.clabel(CS, inline=1, fontsize=10)# 繪制最優解ax.plot(theta[0,0], theta[1,0], 'rx', markersize=10, linewidth=2)# 繪制梯度下降過程ax.plot(thetas[0], thetas[1], 'rx', markersize=3, linewidth=1)ax.plot(thetas[0], thetas[1], 'r-')plt.show()

擬合狀況：

可以看到，bgd 運行的并不慢，這是因為在 regression 程序中，我們采用了向量形式計算 $θ$ ，計算機會通過并行計算的手段來優化速度。

誤差隨迭代次數的關系：

誤差函數的下降曲面：

梯度下降過程：

sgd測試

# coding: utf-8 # linear_regression/test_sgd.py import regression from matplotlib import cm from mpl_toolkits.mplot3d import axes3d import matplotlib.pyplot as plt import matplotlib.ticker as mtick import numpy as npif **name** == "**main**":X, y = regression.loadDataSet('data/ex1.txt');m,n = X.shapeX = np.concatenate((np.ones((m,1)), X), axis=1)rate = 0.01maxLoop = 100epsilon =0.01result, timeConsumed = regression.sgd(rate, maxLoop, epsilon, X, y)theta, errors, thetas = result# 繪制擬合曲線fittingFig = plt.figure()title = 'sgd: rate=%.2f, maxLoop=%d, epsilon=%.3f \n time: %ds'%(rate,maxLoop,epsilon,timeConsumed)ax = fittingFig.add_subplot(111, title=title)trainingSet = ax.scatter(X[:, 1].flatten().A[0], y[:,0].flatten().A[0])xCopy = X.copy()xCopy.sort(0)yHat = xCopy*thetafittingLine, = ax.plot(xCopy[:,1], yHat, color='g')ax.set_xlabel('Population of City in 10,000s')ax.set_ylabel('Profit in $10,000s')plt.legend([trainingSet, fittingLine], ['Training Set', 'Linear Regression'])plt.show()# 繪制誤差曲線errorsFig = plt.figure()ax = errorsFig.add_subplot(111)ax.yaxis.set_major_formatter(mtick.FormatStrFormatter('%.4f'))ax.plot(range(len(errors)), errors)ax.set_xlabel('Number of iterations')ax.set_ylabel('Cost J')plt.show()# 繪制能量下降曲面size = 100theta0Vals = np.linspace(-10,10, size)theta1Vals = np.linspace(-2, 4, size)JVals = np.zeros((size, size))for i in range(size):for j in range(size):col = np.matrix([[theta0Vals[i]], [theta1Vals[j]]])JVals[i,j] = regression.J(col, X, y)theta0Vals, theta1Vals = np.meshgrid(theta0Vals, theta1Vals)JVals = JVals.TcontourSurf = plt.figure()ax = contourSurf.gca(projection='3d')ax.plot_surface(theta0Vals, theta1Vals, JVals, rstride=8, cstride=8, alpha=0.3,cmap=cm.rainbow, linewidth=0, antialiased=False)ax.plot(thetas[0], thetas[1], 'rx')ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')ax.set_zlabel(r'$J(\theta)$')plt.show()# 繪制能量輪廓contourFig = plt.figure()ax = contourFig.add_subplot(111)ax.set_xlabel(r'$\theta_0$')ax.set_ylabel(r'$\theta_1$')CS = ax.contour(theta0Vals, theta1Vals, JVals, np.logspace(-2,3,20))plt.clabel(CS, inline=1, fontsize=10)# 繪制最優解ax.plot(theta[0,0], theta[1,0], 'rx', markersize=10, linewidth=2)# 繪制梯度下降過程ax.plot(thetas[0], thetas[1], 'r', linewidth=1)plt.show()

擬合狀況：

誤差隨迭代次數的關系：

梯度下降過程：

在學習率為 $α=0.01\alpha = 0.01$ 時，隨機梯度下降法出現了非常明顯的抖動，同時，隨機梯度下降法的速度優勢也并未在此得到體現，一是樣本容量不大，二是其自身很難通過并行計算去優化速度。

總結

以上是生活随笔為你收集整理的1.3 程序示例--梯度下降-机器学习笔记-斯坦福吴恩达教授的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： 1.2 线性回归与梯度下降-机器学习笔记
下一篇： 1.4 正规方程-机器学习笔记-斯坦福吴