Python3入门机器学习 - 梯度下降法

``def DJ(theta):      //计算损失函数J的斜率    return 2*(theta-2.5)def J(theta):        //损失函数J，使用梯度下降法 求该函数极小值    return (theta-2.5)**2+1``

``theta = 0.0eta = 0.1epsilon = 1e-8theta_history = [theta]while True:    gradient = DJ(theta)    last_theta = theta    theta = theta - eta*gradient    theta_history.append(theta)    if(abs(J(theta) - J(last_theta))<epsilon):        breakpyplot.plot(plot_x,plot_y)pyplot.plot(np.array(theta_history),J(np.array(theta_history)),color='r',marker='+')``

``    def fit_gd(self,X_train,y_train,eta=0.01,n_iters=1e6):        def J(theta,X_b,y):            try:                return np.sum((y-X_b.dot(theta))**2)/len(y)            except:                return float("inf")        def dJ(theta,X_b,y):            # res = np.empty()            # res[0] = np.sum(X_b.dot(theta)-y)            # for i in range(1,len(theta)):            #     res[i] = (X_b.dot(theta)-y).dot(X_b[:,i])            # return res * 2 / len(X_b)            return X_b.T.dot(X_b.dot(theta)-y)*2./len(X_b)        def gradient_descent(X_b,y,initial_theta,eta,n_iters=1e6,epsilon=1e-8):            theta = initial_theta            cur_iter = 0            while cur_iter<n_iters:                gradient = dJ(theta,X_b,y)                last_theta = theta                theta = theta - eta * gradient                if (abs(J(theta,X_b,y) - J(last_theta,X_b,y)) < epsilon):                    break                cur_iter+=1            return theta        X_b = np.hstack([np.ones((len(X_train),1)),X_train])        initial_theta = np.zeros(X_b.shape[1])        self._theta = gradient_descent(X_b,y_train,initial_theta,eta,n_iters)        self.interception_ = self._theta[0]        self.coef_ = self._theta[1:]        return self``

``def dJ_sgd(theta,X_b_i,y_i):    return X_b_i.T.dot(X_b_i.dot(theta)-y_i)*2.def sgd(X_b,y, initial_theta,n_iters):    t0 = 5.0    t1 = 50.0        def learning_theta(t):        return t0/(t1+t)        theta = initial_theta    for cur_iter in range(n_iters):        rand_i = np.random.randint(len(X_b))        gradient = dJ_sgd(theta,X_b[rand_i],y[rand_i])        theta = theta-learning_theta(cur_iter) * gradient    return theta``

``from sklearn.linear_model import SGDRegressorsgd = SGDRegressor(n_iter=1000)sgd.fit(X_train_standard,y_train)sgd.score(X_test_standard,y_test)``

``def dJ_debug(theta,X_b,y,epslion=0.01):    res = np.empty(len(theta))    for i in range(len(theta)):        theta_1 = theta.copy()        theta_1[i] += epslion        theta_2 = theta.copy()        theta_2[i] -= epslion        res[i] = (J(theta_1,X_b,y)-J(theta_2,X_b,y)/(2*epslion))    return res``