package com.geccocrawler.gecco.demo.jd;
import java.util.List;
import com.geccocrawler.gecco.GeccoEngine;
import com.geccocrawler.gecco.annotation.Gecco;
import com.geccocrawler.gecco.annotation.HtmlField;
import com.geccocrawler.gecco.annotation.Request;
import com.geccocrawler.gecco.request.HttpGetRequest;
import com.geccocrawler.gecco.request.HttpRequest;
import com.geccocrawler.gecco.spider.HtmlBean;
@Gecco(matchUrl="https://www.jd.com/allSort.aspx", pipelines={"consolePipeline", "allSortPipeline"})
public class AllSort implements HtmlBean {
private static final long serialVersionUID = 665662335318691818L;
@Request
private HttpRequest request;
//手机
@HtmlField(cssPath=".category-items > div:nth-child(1) > div:nth-child(2) > div.mc > div.items > dl")
private List<Category> mobile;
//家用电器
@HtmlField(cssPath=".category-items > div:nth-child(1) > div:nth-child(3) > div.mc > div.items > dl")
private List<Category> domestic;
//母婴
@HtmlField(cssPath=".category-items > div:nth-child(2) > div:nth-child(2) > div.mc > div.items > dl")
private List<Category> baby;
public List<Category> getMobile() {
return mobile;
}
public void setMobile(List<Category> mobile) {
this.mobile = mobile;
}
public List<Category> getDomestic() {
return domestic;
}
public void setDomestic(List<Category> domestic) {
this.domestic = domestic;
}
public HttpRequest getRequest() {
return request;
}
public void setRequest(HttpRequest request) {
this.request = request;
}
public List<Category> getBaby() {
return baby;
}
public void setBaby(List<Category> baby) {
this.baby = baby;
}
public static void main(String[] args) {
//先获取分类列表
HttpGetRequest start = new HttpGetRequest("https://www.jd.com/allSort.aspx");
start.setCharset("GBK");
GeccoEngine.create()
.classpath("com.geccocrawler.gecco.demo.jd")
//开始抓取的页面地址
.start(start)
//开启几个爬虫线程
.thread(1)
//单个爬虫每次抓取完一个请求后的间隔时间
.interval(2000)
.run();
//分类列表下的商品列表采用3线程抓取
GeccoEngine.create()
.classpath("com.geccocrawler.gecco.demo.jd")
//开始抓取的页面地址
.start(AllSortPipeline.sortRequests)
//开启几个爬虫线程
.thread(3)
//单个爬虫每次抓取完一个请求后的间隔时间
.interval(2000)
.start();
}
}