package us.codecraft.webmagic.processor;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
/**
* Interface to be implemented to customize a crawler.<br>
* <br>
* In PageProcessor, you can customize:
* <br>
* start urls and other settings in {@link Site}<br>
* how the urls to fetch are detected <br>
* how the data are extracted and stored <br>
*
* @author code4crafter@gmail.com <br>
* @see Site
* @see Page
* @since 0.1.0
*/
public interface PageProcessor {
/**
* process the page, extract urls to fetch, extract the data and store
*
* @param page page
*/
public void process(Page page);
/**
* get the site settings
*
* @return site
* @see Site
*/
public Site getSite();
}