java爬虫ajax_java爬虫webmagic 案例爬取动态(ajax+js) 网站京东售价格项目
packageorg.study.WebMagicStudy;importjava.util.HashMap;importjava.util.HashSet;importjava.util.List;importjava.util.Map;importjava.util.Set;importus.codecraft.webmagic.Page;importus.codecraft.webmagic
packageorg.study.WebMagicStudy;importjava.util.HashMap;importjava.util.HashSet;importjava.util.List;importjava.util.Map;importjava.util.Set;importus.codecraft.webmagic.Page;importus.codecraft.webmagic.Site;importus.codecraft.webmagic.Spider;importus.codecraft.webmagic.pipeline.FilePipeline;importus.codecraft.webmagic.processor.PageProcessor;public class JDAjaxProcessor implementsPageProcessor {public static final String URL_LIST = "http://list\\.jd\\.com/list\\.html\\?cat=9987,653,655&page=\\d+\\&go=0\\&JL=6_0_0";//用于存储{key:手机ID,value:手机名称}
static Map map = new HashMap();static Set uri = new HashSet();public static voidmain(String[] args) {
String list= "http://list.jd.com/list.html?cat=9987,653,655&page=1&go=0&JL=6_0_0";
Spider.create(newJDAjaxProcessor()).addUrl(list)
.addPipeline(new FilePipeline("D:\\webmagic\\"))
.run();for(String s : map.values()) {
System.out.println(s);
}
System.out.println("map-->" +map.size());
System.out.println(map.get("10274956063"));
}private Site site = Site.me().setRetryTimes(3).setSleepTime(100);publicSite getSite() {returnsite;
}public voidprocess(Page page) {if(page.getUrl().regex(URL_LIST).match()) {//page.setSkip(true);
page.putField("id",page.getHtml().xpath("//div[@class='p-focus']/a/@data-sku").all());
page.putField("name",page.getHtml().xpath("//div[@class='p-name']/a/em/text()").all());
List ids = (List) page.getResultItems().get("id");
List name = (List) page.getResultItems().get("name");
String makerUrl=makerUrl(ids);//System.out.println("价格连接" + makerUrl);//key:id,value:price
Map running =JDJsonPreocessor.running(makerUrl);for (int i = 0; i < name.size(); i++) {
String price= running.get("J_"+ids.get(i));
map.put(ids.get(i), name.get(i)+"\t"+price);
}
page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all());
}
}public String makerUrl(Listids){
StringBuffer sb= newStringBuffer();for(String id : ids) {
sb.append("J_"+id+",");
}
String substring= sb.substring(0, sb.length()-1);return "http://p.3.cn/prices/mgets?skuIds="+substring+"&callback=result";
}public voidwriteFile(){
}
}
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐


所有评论(0)