使用curl 下载HTML

时间:2014-05-12 02:55:46   收藏:0   阅读:394

简单的一个curl小例子:

 

bubuko.com,布布扣
#include <iostream>
#include <string>
#include <sstream>
#include <curl/curl.h>
#include <string.h>

#define BUF_SIZE 1024 * 100
using namespace std;

string DownloadString(char* url);
int main(int argc, const char* argv[]){


    curl_global_init(CURL_GLOBAL_ALL);
    cout <<DownloadString("http://www.baidu.com/");
    cin.get();
    curl_global_cleanup();
    return 0;
}

int WriteData(char* in, size_t size, size_t nmemb,  string* out){
    out->append(in);
    return size*nmemb;
}

string DownloadString(char* url){
    string buffer;
    string headerData;
    CURL* conn;
    curl_slist* header = NULL;
    header = curl_slist_append(header, "Accept-Encoding: gzip, deflate");
    header = curl_slist_append(header, "User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; CIBA)");
    header = curl_slist_append(header, "Connection: Keep-Alive");

    conn = curl_easy_init();
    curl_easy_setopt(conn, CURLoption::CURLOPT_URL, url);
    curl_easy_setopt(conn, CURLoption::CURLOPT_HTTPHEADER, header);
    curl_easy_setopt(conn, CURLoption::CURLOPT_ACCEPT_ENCODING, "gzip");
    curl_easy_setopt(conn, CURLoption::CURLOPT_WRITEDATA, &buffer);
    curl_easy_setopt(conn, CURLoption::CURLOPT_WRITEFUNCTION, WriteData);
    curl_easy_setopt(conn, CURLoption::CURLOPT_WRITEHEADER, &headerData);

    CURLcode code = curl_easy_perform(conn);
    if (code != CURLcode::CURLE_OK)
        return "";
    curl_slist_free_all(header);
    curl_easy_cleanup(conn);

    istringstream istream(headerData.c_str());
    string out;
    bool isgzip = false;
    while (istream.good())
    {
        getline(istream, out, \n);
        if (!out.empty()){
            if (out.find("Content-Encoding") != out.npos && out.find("gzip") != out.npos){
                isgzip = true;
            }
        }
    }

    ///gzip
    return buffer;
}
bubuko.com,布布扣

 

该例子通过curl下载百度首页html与响应头信息,并自动对gzip解码。

使用curl 下载HTML,布布扣,bubuko.com

评论(0
© 2014 mamicode.com 版权所有 京ICP备13008772号-2  联系我们:gaon5@hotmail.com
迷上了代码!