packageorg.study.WebMagicStudy;importjava.util.HashMap;importjava.util.HashSet;importjava.util.List;importjava.util.Map;importjava.util.Set;importus.codecraft.webmagic.Page;importus.codecraft.webmagic.Site;importus.codecraft.webmagic.Spider;importus.codecraft.webmagic.pipeline.FilePipeline;importus.codecraft.webmagic.processor.PageProcessor;public class JDAjaxProcessor implementsPageProcessor {public static final String URL_LIST = "http://list\\.jd\\.com/list\\.html\\?cat=9987,653,655&page=\\d+\\&go=0\\&JL=6_0_0";//用于存储{key:手机ID,value:手机名称}

static Map map = new HashMap();static Set uri = new HashSet();public static voidmain(String[] args) {

String list= "http://list.jd.com/list.html?cat=9987,653,655&page=1&go=0&JL=6_0_0";

Spider.create(newJDAjaxProcessor()).addUrl(list)

.addPipeline(new FilePipeline("D:\\webmagic\\"))

.run();for(String s : map.values()) {

System.out.println(s);

}

System.out.println("map-->" +map.size());

System.out.println(map.get("10274956063"));

}private Site site = Site.me().setRetryTimes(3).setSleepTime(100);publicSite getSite() {returnsite;

}public voidprocess(Page page) {if(page.getUrl().regex(URL_LIST).match()) {//page.setSkip(true);

page.putField("id",page.getHtml().xpath("//div[@class='p-focus']/a/@data-sku").all());

page.putField("name",page.getHtml().xpath("//div[@class='p-name']/a/em/text()").all());

List ids = (List) page.getResultItems().get("id");

List name = (List) page.getResultItems().get("name");

String makerUrl=makerUrl(ids);//System.out.println("价格连接" + makerUrl);//key:id,value:price

Map running =JDJsonPreocessor.running(makerUrl);for (int i = 0; i < name.size(); i++) {

String price= running.get("J_"+ids.get(i));

map.put(ids.get(i), name.get(i)+"\t"+price);

}

page.addTargetRequests(page.getHtml().links().regex(URL_LIST).all());

}

}public String makerUrl(Listids){

StringBuffer sb= newStringBuffer();for(String id : ids) {

sb.append("J_"+id+",");

}

String substring= sb.substring(0, sb.length()-1);return "http://p.3.cn/prices/mgets?skuIds="+substring+"&callback=result";

}public voidwriteFile(){

}

}

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐