# 机器学习之逻辑回归(纯python实现)

2018-03-01 11:06:49来源:网络收集作者:一条狗人点击

logistic回归是一种广义的线性回归，通过构造回归函数，利用机器学习来实现分类或者预测。

// 两个特征
-0.017612 14.053064 0
-1.395634 4.662541 1
-0.752157 6.538620 0
-1.322371 7.152853 0
0.423363 11.054677 0
0.406704 7.067335 1

Cost函数(损失函数)：该函数预测的输出h和训练数据类别y之间的偏差，(h-y)或者其他形式。综合考虑所有训练数据的cost， 将其求和或者求平均，极为J函数， 表示所有训练数据预测值和实际值的偏差。

cost函数

# sigmoid函数和初始化数据
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def init_data():
dataMatIn = data[:, 0:-1]
classLabels = data[:, -1]
dataMatIn = np.insert(dataMatIn, 0, 1, axis=1) #特征数据集，添加1是构造常数项x0
return dataMatIn, classLabels
// 梯度上升
dataMatrix = np.mat(dataMatIn) #(m,n)
labelMat = np.mat(classLabels).transpose()
m, n = np.shape(dataMatrix)
weights = np.ones((n, 1)) #初始化回归系数（n, 1)
alpha = 0.001 #步长
maxCycle = 500 #最大循环次数
for i in range(maxCycle):
h = sigmoid(dataMatrix * weights) #sigmoid 函数
weights = weights + alpha * dataMatrix.transpose() * (labelMat - h) #梯度
return weights
// 计算结果
if __name__ == '__main__':
dataMatIn, classLabels = init_data()
print(r)

[[ 4.12414349]
[ 0.48007329]
[-0.6168482 ]]

def plotBestFIt(weights):
dataMatIn, classLabels = init_data()
n = np.shape(dataMatIn)[0]
xcord1 = []
ycord1 = []
xcord2 = []
ycord2 = []
for i in range(n):
if classLabels[i] == 1:
xcord1.append(dataMatIn[i][1])
ycord1.append(dataMatIn[i][2])
else:
xcord2.append(dataMatIn[i][1])
ycord2.append(dataMatIn[i][2])
fig = plt.figure()
ax.scatter(xcord1, ycord1,s=30, c='red', marker='s')
ax.scatter(xcord2, ycord2, s=30, c='green')
x = np.arange(-3, 3, 0.1)
y = (-weights[0, 0] - weights[1, 0] * x) / weights[2, 0] #matix
ax.plot(x, y)
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()

m, n = np.shape(dataMatIn)
alpha = 0.01
weights = np.ones(n)
for i in range(m):
h = sigmoid(sum(dataMatIn[i] * weights)) #数值计算
error = classLabels[i] - h
weights = weights + alpha * error * dataMatIn[i]
return weights

m, n = np.shape(dataMatIn)
weights = np.ones(n)
for j in range(numIter):
dataIndex = list(range(m))
for i in range(m):
alpha = 4 / (1 + i + j) + 0.01 #保证多次迭代后新数据仍然有影响力
randIndex = int(np.random.uniform(0, len(dataIndex)))
h = sigmoid(sum(dataMatIn[i] * weights)) # 数值计算
error = classLabels[i] - h
weights = weights + alpha * error * dataMatIn[i]
del(dataIndex[randIndex])
return weights