# 机器学习笔记（2）——感知机

2015-10-17 19:18:29来源:CSDN作者:zx10212029人点击

## Perceptron（感知机）

f(x)=sign(wx+b)

#### 2.感知机学习策略

T={(x1,y1),(x2,y2),,(xN,yN)}

1w|wxi+b|

yi(wxi+b)>0

1wyi|wxi+b|

1w(xi,yi)Myi|wxi+b|

T={(x1,y1),(x2,y2),,(xN,yN)}

L(w,b)=(xi,yi)Myi(wxi+b)

#### 3.感知机学习算法

minw,bL(w,b)=(xi,yi)Myi(wxi+b)

wL(w,b)=(xi,yi)Myixi
bL(w,b)=(xi,yi)Myi

ww+ηyixi
bb+ηyi

• Algorithm 2.1
• Input: traning_data T={(x1,y1),(x2,y2),,(xN,yN)}, traning_rate η(0<η1)
• Output: parameter: w,b, f(x)=sign(wx+b)
• Initialize: w0,b0
• for randomly selected item (xi,yi)T
•   if yi(wxi+b)0
•      ww+ηyixi
•      bb+ηyi
•    end if
• end for

在示例中，我们选取正样本为x1=(3,3)T,x2=(4,3)T，负样本为x3=(1,1)T,通过Python来实现感知机的原始形式：

# Project: Machine Learning-Perceptron# Author: Lyndon# Date: 2015/10/15import copyfrom matplotlib import pyplot as pltfrom matplotlib import animation#initializetraining_data=[[(3,3),1],[(4,3),1],[(1,1),-1]]w=[0,0]     #weightb=[0]       #biasstep=1      #learning ratehistory=[]# update parameters using stochastic gradient descent # parameter item: an item which is classified into wrong class# return: NULL def update(item):    global w,b,history    w[0]+=step*item[1]*item[0][0]    w[1]+=step*item[1]*item[0][1]    b[0]+=step*item[1]    print w,b    history.append([copy.copy(w),copy.copy(b)])# calculate the item classified result. y_i*(w*x_i+b)<=0 wrong# parameter item: training_data# return: classified result def calculate(item):    res=0;    for i in range(len(item[0])):        res+=w[i]*item[0][i]    res+=b[0]    res*=item[1]    return res# check if the classifier can correctly classify all data# parameter item: Null# return: correct or notdef check():    flag=False    for item in training_data:        if calculate(item)<=0:            update(item)            flag=True    if not flag:        print "Result: w:" +str(w)+"b"+str(b)    return flag# main functionif __name__=="__main__":    for i in range(1000):        if not check():            break# set up the figure    fig = plt.figure()    ax = plt.axes(xlim=(0,6),ylim=(0,6))    line, = ax.plot([],[],lw=2)    label = ax.text([],[],'')# initialization function for base frame    def init():        line.set_data([],[])        x1, y1, x2, y2=[], [], [], []  #initialize the training data,(x1,y1) is positive        for item in training_data:            if item[1]>0:                x1.append(item[0][0])                y1.append(item[0][1])            else:                x2.append(item[0][0])                y2.append(item[0][1])              plt.plot(x1,y1,'bo',x2,y2,'rx')        plt.axis([-6, 6, -6, 6])        plt.grid(True)        plt.xlabel('x')        plt.ylabel('y')        plt.title('Machine Learning - perceptron')        return line, label# animation function     def animate(i):        global ax, line, label        w = history [i][0]        b = history [i][1]        # hyperplane: w[0]*x_i+w[1]*y_i+b=0; y_i=-(w[0]*x_i+b)/w[1]        if w[1]==0:            return line, label        x1 = -6        y1 = -(w[0]*x1+b[0])/w[1]        x2 = 6        y2 = -(w[0]*x2+b[0])/w[1]        line.set_data([x1,x2],[y1,y2])        x0=0        y0 = -(w[0]*x0+b[0])/w[1]        label.set_text(history[i])        label.set_position([x0,y0])        return line,label# call the animator    print history    anim=animation.FuncAnimation(fig,animate,init_func=init,frames=len(history),interval=1000)    plt.show()

(1)，存在满足条件wˆopt=1的超平面wˆoptxˆ=woptx+bopt=0强训练数据集完全分开，且存在γ>0,对所有样本有：
yi(wˆoptxˆi)=yi(woptxi+bopt)γ
(2)令R=max1iNxˆi 则上述感知机算法在训练机上的误分类次数k 满足以下不等式，即算法的迭代次数
k(Rγ)2

w=i=1Nαiyixi
b=i=1Nαiyi

• Algorithm 2.2
• Input: traning_data T={(x1,y1),(x2,y2),,(xN,yN)}, traning_rate η(0<η1)
• Output: parameter: a,b, f(x)=sign(Nj=1ajyjxjx+b)
• Initialize: a0,b0
• for randomly selected item (xi,yi)T
•   if yi(Nj=1ajyjxjxi+b)0
•      aiai+η
•      bb+ηyi
•    end if
• end for

对偶形式中的训练样本仅以内积的形式出现。为了方便，可以预先将训练样本间的内积求出来，并以矩阵形式存储，这样在计算过程中将简化很多运算。矩阵形式为：
G=[xiyj]N×N
感知机对偶形式代码

# Project: Machine Learning-Perceptron_dual# Author: Lyndon# Date: 2015/10/15import numpy as npfrom matplotlib import pyplot as pltfrom matplotlib import animation#initialize model f(x)=sign(sum(a_j*y_j*x_j*x_i+b))training_data=np.array([[(3,3),1],[(4,3),1],[(1,1),-1]])a = np.zeros(len(training_data),np.float)b = 0.0Gram = Nonex = np.empty((len(training_data),2), np.float)y = np.array(training_data[:,1])for i in range(len(training_data)):    x[i]=training_data[i][0]history=[]# calculate the Gram matrix for dual form# parameter item: Null# return: Gram matrixdef cal_Gram():    g = np.empty((len(training_data),len(training_data)),np.float)    for i in range(len(training_data)):        for j in range(len(training_data)):            g[i][j]=np.dot(training_data[i][0],training_data[j][0])    return g # update parameters using stochastic gradient descent # parameter item: an item which is classified into wrong class# return: NULL def update(i):    global a,b,history    a[i]+=1    b+=y[i]    print a,b    history.append([np.dot(a*y,x),b])# calculate the item classified result. y_i*(w*x_i+b)<=0 wrong# parameter item: training_data# return: classified result def calculate(i):    global a,b    res = np.dot(a*y,Gram[i])    res = (res+b)*y[i]    return res# check if the classifier can correctly classify all data# parameter item: Null# return: correct or notdef check():    flag=False    for i in range(len(training_data)):        if calculate(i)<=0:            update(i)            flag=True    if not flag:        w=np.dot(a*y,x)        print "Result: w:" +str(w)+"b:" +str(b)    return flag# main functionif __name__=="__main__":    Gram = cal_Gram()    for i in range(1000):        if not check():            break# set up the figure    fig = plt.figure()    ax = plt.axes(xlim=(0,6),ylim=(0,6))    line, = ax.plot([],[],lw=2)    label = ax.text([],[],'')# initialization function for base frame    def init():        line.set_data([],[])        x1, y1, x2, y2=[], [], [], []  #initialize the training data,(x1,y1) is positive        for item in training_data:            if item[1]>0:                x1.append(item[0][0])                y1.append(item[0][1])            else:                x2.append(item[0][0])                y2.append(item[0][1])              plt.plot(x1,y1,'bo',x2,y2,'rx')        plt.axis([-6, 6, -6, 6])        plt.grid(True)        plt.xlabel('x')        plt.ylabel('y')        plt.title('Machine Learning - perceptron_dual')        return line, label# animation function     def animate(i):        global ax, line, label        w = history [i][0]        b = history [i][1]        # hyperplane: w[0]*x_i+w[1]*y_i+b=0; y_i=-(w[0]*x_i+b)/w[1]        if w[1]==0:            return line,label        x1 = -6        y1 = -(w[0]*x1+b)/w[1]        x2 = 6        y2 = -(w[0]*x2+b)/w[1]        line.set_data([x1,x2],[y1,y2])        x0=0        y0 = -(w[0]*x0+b)/w[1]        label.set_text(history[i])        label.set_position([x0,y0])        return line,label# call the animator    print history    anim=animation.FuncAnimation(fig,animate,init_func=init,frames=len(history),interval=1000)    plt.show()

PS：