package us.codecraft.webmagic.samples;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.Spider;
import us.codecraft.webmagic.downloader.selenium.SeleniumDownloader;
import us.codecraft.webmagic.pipeline.FilePipeline;
import us.codecraft.webmagic.processor.PageProcessor;
/**
*
* Using Selenium with PhantomJS to fetch web-page with JS<br>
*
* @author bob.li.0718@gmail.com <br>
* Date: 15-7-11 <br>
*/
public class GooglePlayProcessor implements PageProcessor {
private Site site;
@Override
public void process(Page page) {
page.putField("whole-html", page.getHtml().toString());
}
@Override
public Site getSite() {
if (null == site) {
site = Site.me().setDomain("play.google.com").setSleepTime(300);
}
return site;
}
public static void main(String[] args) {
Spider.create(new GooglePlayProcessor())
.thread(5)
.addPipeline(
new FilePipeline(
"/Users/Bingo/Documents/workspace/webmagic/webmagic-selenium/data/"))
.setDownloader(new SeleniumDownloader())
.addUrl("https://play.google.com/store/apps/details?id=com.tencent.mm")
.runAsync();
}
}