python爬虫之铁路客运营业站站点

2017-01-13 19:13:47来源:CSDN作者:qq_36319472人点击

大家直接看代码吧

import timeimport requestsfrom bs4 import BeautifulSoupdef fetch_data(url,bureau,desc,fd):    try:        s = requests.get(url)    except Excepion as e:        print('request url fail. '+url)        return    b = BeautifulSoup(s.content,'html.parser')    datas = b.select('table table tr')    if len(datas) <= 2:        s = 'find nothing' + url+''+bureau+''+desc        print(s)        fd.write(s+'/n')    for i in range(0,len(datas)):        if i<2:            continue        infos = datas[i].find_all('td')        out = u''        for info in infos:            out += info.text            out += u", "        out += bureau + u', '+desc        s = out.encode('utf-8')        fd.write(s.decode('UTF-8',errors='replace'))        fd.write('/n')        print(s)if __name__ == '__main__':    url = 'http://www.12306.cn/mormhweb/kyyyz/'    try:        s = requests.get(url)    except Excepion as e:        print('requests url fail. '+url)        raise e    b = BeautifulSoup(s.content,'html.parser')    names = b.select('#secTable > tbody > tr > td')    sub_urls = b.select('#mainTable td.submenu_bg > a')    with open('9.final.txt','w',encoding='utf-8') as fd:        print('ok')        print(names)        for i in range(0,len(names)):            sub_url1 = url + sub_urls[i*2]['href'][2:]            fetch_data(sub_url1,names[i].text,u'车站',fd)            time.sleep(5)            sub_url2 = url+sub_urls[i*2+1]['href'][2:]            print(sub_url2)            fetch_data(sub_url2,names[i].text,u'乘降所',fd)            time.sleep(5)

最新文章

123

最新摄影

微信扫一扫

第七城市微信公众平台