c++爬虫大众点评数据

2017-01-03 11:32:39来源:作者:人点击

第七城市
#include < curl / curl.h >#include < iostream >#include < stdio.h >#include < string.h >#include < pcre.h >#define OVECCOUNT 30/* should be a multiple of 3 */#define EBUFLEN 128#define BUFLEN 10240using namespacestd;size_t onWriteData(void * buffer, size_t size, size_t nmemb, void * str) {    if (!str || !buffer) {        return - 1;    }    string * result = (string * ) str;    result - >append((char * ) buffer, size * nmemb);    return nmemb;}//获取页面int getWeb(string url, string & result){    long code = 0;    string htmlpage;    CURL * curl = curl_easy_init();    curl_easy_setopt(curl, CURLOPT_URL, url.c_str()); //设置url    curl_easy_setopt(curl, CURLOPT_POST, 0); //设置请求方法    curl_easy_setopt(curl, CURLOPT_USERAGENT, "Mozilla/5."); //伪装客户端    curl_easy_setopt(curl, CURLOPT_WRITEDATA, &htmlpage); //设置接受返回结果字符串    curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, onWriteData); //设置处理方法    curl_easy_perform(curl); //请求    curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &code);    if (code == 200)    {        cout << "request success" << endl;        result = htmlpage;        //cout<<htmlpage<<endl;    }    curl_easy_cleanup(curl);    return code;}int main(int argc, char * *argv){    pcre * re;    constchar * error;    int erroffset;    int ovector[OVECCOUNT];    int rc,    i;    string url = "http://www.dianping.com/search/category/212/10/g103";    string html;    getWeb(url, html);    //char src[] =   " ";    //char pattern[] = "(<a>.+?</a>)";    constchar * src = html.c_str();    char pattern[] = "(<li class=/"/"[//s//S]*?</li>)";    printf("String : %s/n", src);    printf("Pattern: /"%s/"/n", pattern);    re = pcre_compile(pattern, 0, &error, &erroffset, NULL);    if (re == NULL) {        printf("PCRE compilation failed at offset %d: %s/n", erroffset, error);        return1;    }    char * p = (char * ) src;    while ((rc = pcre_exec(re, NULL, p, strlen(p), 0, 0, ovector, OVECCOUNT)) != PCRE_ERROR_NOMATCH)    {        printf("/nOK, %d matched .../n/n", rc);        for (i = 0; i < rc - 1; i++)        {            char * substring_start = p + ovector[2 * i];            int substring_length = ovector[2 * i + 1] - ovector[2 * i];            char matched[10240];            memset(matched, 0, 10240);            strncpy(matched, substring_start, substring_length);            printf("match:%s/n", matched);        }        p += ovector[1];        if (!p)        {            break;        }    }    pcre_free(re);    return0;}
第七城市

最新文章

123

最新摄影

微信扫一扫

第七城市微信公众平台