c++网页抓取_c++抓取网页内容源代码下载
2015-08-01 17:19:08  By: dwtedx

本DLL是基于Libcurl写的傻瓜化C++网页获取类封装、本项目不适用于一些大型需要功能较多的项目、适用于小型的只需要单纯地用Get、Post提交方式、可以保存cookies、需要的可以下载研究一下、代码比较简单


#include <vjspider_c.h>
#include <vjspider.h>
#include <vjspider_helper.h>

map<int, VJSpider*>     __vjspiders;
map<int, string>        __cookies;
map<int, get_data>      __tmpgetdata;
map<int, post_data>     __tmppostdata;
map<int, string>        __getstring;
map<int, string>        __poststring;

bool vj_init_global_envi()
{
    return VJSpider::init_global_envi();
}

void vj_release_global_envi()
{
    return VJSpider::release_global_envi();
}

int vj_create_spider()
{
    int idx = VJRand();
    while(__vjspiders.find(idx) != __vjspiders.end()) idx = VJRand();

    __cookies[idx] = "";
    __poststring[idx] = "";
    __getstring[idx] = "";
    __vjspiders[idx] = new VJSpider();
    __tmpgetdata[idx].clear();
    __tmppostdata[idx].clear();

    return idx;
}

void vj_release_spider(int idx)
{
    map<int, VJSpider*>::iterator it = __vjspiders.find(idx);
    if(it != __vjspiders.end())
    {
        delete it->second;
        __vjspiders.erase(it);
    }

    map<int, get_data>::iterator getit = __tmpgetdata.find(idx);
    if(getit != __tmpgetdata.end())
    {
        getit->second.clear();
        __tmpgetdata.erase(getit);
    }

    map<int, post_data>::iterator postit = __tmppostdata.find(idx);
    if(postit != __tmppostdata.end())
    {
        postit->second.clear();
        __tmppostdata.erase(postit);
    }

    __cookies[idx] = "";
    __getstring[idx] = "";
    __poststring[idx] = "";

    return;
}

bool vj_init_context(int idx, bool auto_del_cookie, 
        const char* cookie_file, bool use_cookie)
{
    map<int, VJSpider*>::iterator it = __vjspiders.find(idx);
    if(it == __vjspiders.end()) return false;

    return __vjspiders[idx]->init_context(auto_del_cookie, 
        cookie_file, use_cookie);
}

const char* vj_get_cookie_filename(int idx)
{
    map<int, VJSpider*>::iterator it = __vjspiders.find(idx);
    if(it == __vjspiders.end())
    {
        __cookies[idx] = "";
        return __cookies[idx].c_str();
    }

    __cookies[idx] = it->second->get_cookie_filename();
    return __cookies[idx].c_str();
}

void vj_del_cookie_file(int idx)
{
    map<int, VJSpider*>::iterator it = __vjspiders.find(idx);
    if(it == __vjspiders.end()) return;

    return it->second->del_cookie_file();
}

bool vj_push_get_data(int idx, const char* name, const char* value)
{
    map<int, VJSpider*>::iterator it = __vjspiders.find(idx);
    if(it == __vjspiders.end()) return false;

    __tmpgetdata[idx].push_back(get_data_item(name, value));
    return true;
}

bool vj_push_post_data(int idx, const char* name, const char* value)
{
    map<int, VJSpider*>::iterator it = __vjspiders.find(idx);
    if(it == __vjspiders.end()) return false;

    __tmppostdata[idx].push_back(post_data_item(name, value));
    return true;
}

void vj_clear_get_data(int idx)
{
    map<int, get_data>::iterator it = __tmpgetdata.find(idx);
    if(it == __tmpgetdata.end()) return;

    it->second.clear();
}

void vj_clear_post_data(int idx)
{
    map<int, post_data>::iterator it = __tmppostdata.find(idx);
    if(it == __tmppostdata.end()) return;

    it->second.clear();
}

const char* vj_get(int idx, const char* url, unsigned int code, int cookie)
{
    map<int, VJSpider*>::iterator it = __vjspiders.find(idx);
    if(it == __vjspiders.end())
    {
        __getstring[idx] = "";
        return __getstring[idx].c_str();
    }

    __getstring[idx] = it->second->get(url, __tmpgetdata[idx], 
        code, (VJ_USE_COOKIE_TYPE)cookie);
    return __getstring[idx].c_str();
}

const char* vj_post(int idx, const char* url, unsigned int code, int cookie)
{
    map<int, VJSpider*>::iterator it = __vjspiders.find(idx);
    if(it == __vjspiders.end())
    {
        __poststring[idx] = "";
        return __poststring[idx].c_str();
    }

    __poststring[idx] = it->second->post(url, __tmppostdata[idx], 
        code, (VJ_USE_COOKIE_TYPE)cookie);
    return __poststring[idx].c_str();
}


若资源对你有帮助、扫描下方的二维码、关注DD博客微信公众号(ddblogs)吧

最后给贴上Demo的源代码、希望对大家有用、有兴趣的哥们可以下载看看

源代码下载链接: http://dwtedx.com/download.html?bdkey=s/1bn8qswR 密码: p9v4

若资源对你有帮助、浏览后有很大收获、不妨小额打赏我一下、你的鼓励是维持我不断写博客最大动力

想获取DD博客最新代码、你可以扫描下方的二维码、关注DD博客微信公众号(ddblogs)

或者你也可以关注我的新浪微博、了解DD博客的最新动态:DD博客官方微博(dwtedx的微博)

如对资源有任何疑问或觉得仍然有很大的改善空间、可以对该博文进行评论、希望不吝赐教

为保证及时回复、可以使用博客留言板给我留言: DD博客留言板(dwtedx的留言板)

感谢你的访问、祝你生活愉快、工作顺心、欢迎常来逛逛


快速评论


技术评论

  • 该技术还没有评论、赶快抢沙发吧...
DD记账
top
+