package com.cse10.crawler.contentHandler; import com.cse10.article.Article; import com.cse10.filter.LengthFilter; import edu.uci.ics.crawler4j.crawler.Page; import org.apache.log4j.Logger; import java.util.ArrayList; import java.util.List; /** * Created by TharinduWijewardane on 10.07.2014. */ public abstract class BasicContentHandler { protected Logger logger = Logger.getLogger(this.getClass()); protected List<Article> articles; public BasicContentHandler() { articles = new ArrayList<Article>(); } /** * to be overridden */ public abstract List extractArticles(Page page); protected boolean filterArticles(String content) { if (content == null) { return false; } // length filter if (!LengthFilter.filterContent(content)) { logger.info("****** Filtered out due to low length of content ****** Content: " + content); return false; } // keywords filter // turned off // if (!KeywordsFilter.filterContent(content)) { // return false; // } return true; } }