node-crawler

node-crawler 介绍

node-crawler这是一个比较好用的node.js爬虫框架，我们可以使用最亲切的jQuery语法来解析响应返回的页面。@H_502_3@

node-crawler安装

npm install crawler@H_502_3@

node-crawler使用

var Crawler = require("crawler");@H_502_3@

var c = new Crawler({@H_502_3@

maxConnections : 10,@H_502_3@

// This will be called for each crawled page@H_502_3@

callback : function (error,res,done) {@H_502_3@

if(error){@H_502_3@

console.log(error);@H_502_3@

}else{@H_502_3@

var $ = res.$;@H_502_3@

// $ is Cheerio by default@H_502_3@

//a lean implementation of core jQuery designed specifically for the server@H_502_3@

console.log($("title").text());@H_502_3@

}@H_502_3@

done();@H_502_3@

}@H_502_3@

});@H_502_3@

// Queue just one URL,with default callback@H_502_3@

c.queue('http://www.amazon.com');@H_502_3@

// Queue a list of URLs@H_502_3@

c.queue(['http://www.google.com/','http://www.yahoo.com']);@H_502_3@

// Queue URLs with custom callbacks & parameters@H_502_3@

c.queue([{@H_502_3@

uri: 'http://parishackers.org/',@H_502_3@

jQuery: false,@H_502_3@

// The global callback won't be called@H_502_3@

callback: function (error,done) {@H_502_3@

if(error){@H_502_3@

console.log(error);@H_502_3@

}else{@H_502_3@

console.log('Grabbed',res.body.length,'bytes');@H_502_3@

}@H_502_3@

done();@H_502_3@

}@H_502_3@

}]);@H_502_3@

// Queue some html code directly without grabbing (mostly for tests)@H_502_3@

c.queue([{@H_502_3@

html: '<p>This is a <strong>test</strong></p>'@H_502_3@

}]);@H_502_3@

网站地址:http://nodecrawler.org @H_502_3@

GitHub:https://github.com/bda-research/node-crawler @H_502_3@

网站描述:一款最好的node.js爬虫工具 @H_502_3@

官方网站：http://nodecrawler.org

node-crawler

node-crawler 介绍

node-crawler安装

node-crawler使用

node-crawler

相关推荐