package us.codecraft.webmagic.model.samples; import us.codecraft.webmagic.Site; import us.codecraft.webmagic.model.HasKey; import us.codecraft.webmagic.model.OOSpider; import us.codecraft.webmagic.model.annotation.ExprType; import us.codecraft.webmagic.model.annotation.ExtractBy; import us.codecraft.webmagic.model.annotation.TargetUrl; import us.codecraft.webmagic.pipeline.JsonFilePageModelPipeline; import java.util.List; /** * @author code4crafter@gmail.com <br> * Date: 13-8-2 <br> * Time: 上午7:52 <br> */ @TargetUrl("http://my.oschina.net/flashsword/blog/\\d+") public class OschinaBlog implements HasKey{ @ExtractBy("//title") private String title; @ExtractBy(value = "div.BlogContent",type = ExprType.CSS) private String content; @ExtractBy(value = "//div[@class='BlogTags']/a/text()", multi = true) private List<String> tags; public static void main(String[] args) { OOSpider.create(Site.me().addStartUrl("http://my.oschina.net/flashsword/blog") ,new JsonFilePageModelPipeline(), OschinaBlog.class).run(); } @Override public String key() { return title; } public String getTitle() { return title; } public String getContent() { return content; } public List<String> getTags() { return tags; } }