# Python3入门机器学习 - 线性回归与knn算法处理boston数据集

``使用最小二乘法计算a、b的值，实现线性回归的拟合# _*_ encoding:utf-8 _*_import numpy as npclass SimpleLinearRegression1:   //该类使用for循环方法计算a、b值，效率较低    def __init__(self):        self.a_ = None        self.b_ = None    def fit(self,X_train,y_train):        X_mean = np.mean(X_train)        y_mean = np.mean(y_train)        num = 0.0        d = 0.0        for (x,y) in zip(X_train,y_train):            num += (x-X_mean)*(y-y_mean)            d += (x-X_mean)**2        self.a_ = num/d        self.b_ = y_mean - self.a_*X_mean    def predict(self,X_test):        return np.array([self._predict(x) for x in X_test ])    def _predict(self,x):        return self.a_*x+self.b_    def __repr__(self):        return "SimpleLinearRegression1()"class SimpleLinearRegression2:          // 该类使用向量乘积方法计算a、b值，效率较高     def __init__(self):        self.a_ = None        self.b_ = None    def fit(self, X_train, y_train):        X_mean = np.mean(X_train)        y_mean = np.mean(y_train)        num = (X_train-X_mean).dot(y_train-y_mean)        d = (X_train-X_mean).dot(X_train-X_mean)        self.a_ = num / d        self.b_ = y_mean - self.a_ * X_mean    def predict(self, X_test):        return np.array([self._predict(x) for x in X_test])    def _predict(self, x):        return self.a_ * x + self.b_    def __repr__(self):        return "SimpleLinearRegression2()"``

``import numpy as npfrom matplotlib import pyplotx = np.random.random(size=100)y = 3.0*x+4.0+np.random.normal(size=100)%run MyScripts/SimpleLinearRegression.pyreg1 = SimpleLinearRegression1()reg2 = SimpleLinearRegression2()%timeit reg1.fit(x,y)%timeit reg2.fit(x,y)y1 = reg1.predict(x)y2 = reg2.predict(x)pyplot.scatter(x,y)pyplot.plot(x,y1,color="r",alpha=0.5)pyplot.plot(x,y2,color='g')``

MSE
``mse = np.sum((y_predict-y_test)**2)/len(y_test)``

RMSE
``rmse = sqrt(mse)``

MAE
``mae = np.sum(np.absolute(y_predict-y_test))/len(y_test)``

R Square
``1-mean_squared_error(y_test,y_predict)/np.var(y_test)``

``# _*_ encoding:utf-8 _*_import numpy as npfrom sklearn.metrics import r2_scoreclass LinearRegression:    def __init__(self):        self.coef_ = None        self.interception_ = None        self._theta = None    def fit_normal(self,X_train,y_train):        X_b = np.hstack([np.ones((len(X_train),1)),X_train])        self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train)        self.interception_ = self._theta[0]        self.coef_ = self._theta[1:]        return self    def predict(self,X_predict):        X_b = np.hstack([np.ones((len(X_predict),1)),X_predict])        return X_b.dot(self._theta)    def score(self,X_test,y_test):        return r2_score(y_test,self.predict(X_test))    def __repr__(self):        return "LinearRegression()"``

KNN算法处理回归问题
``knn_reg = KNeighborsRegressor()params=[    {        'weights':['uniform'],        'n_neighbors':[i for i in range(1,11)]    },    {        'weights':['distance'],        'n_neighbors':[i for i in range(1,11)],        'p':[i for i in range(1,6)]    }]grid_search = GridSearchCV(knn_reg,params,n_jobs=-1,verbose=1)grid_search.fit(X_train,y_train)``

`grid_search.best_params_` {'n_neighbors': 5, 'p': 1, 'weights': 'distance'}
`grid_search.best_score_` 0.634093080186858
`grid_search.best_estimator_.score(X_test,y_test)` 0.7044357727037996