package org.meaningfulweb.cext.processors; import java.util.HashMap; import java.util.Map; import org.jdom.Document; import org.meaningfulweb.cext.HtmlContentProcessor; import org.meaningfulweb.util.URLUtil; public class DomainSpecifiedImageProcessor extends HtmlContentProcessor { public static class ExtractedImage{ public String fullImage; public String thumbnailImage; } public static abstract class DomainImageExtractionHandler{ public abstract ExtractedImage extract(String url); } private Map<String,DomainImageExtractionHandler> _handlerMap = new HashMap<String,DomainImageExtractionHandler>(); public void addExtractionHandler(String host,DomainImageExtractionHandler handler){ _handlerMap.put(host,handler); } @Override public boolean processContent(Document document) { Map<String,Object> extractedMap = this.getExtracted(); // image already extracted if (extractedMap.containsKey("image")){ return true; } String url = (String)getMetadata().get("url"); String host = URLUtil.getHost(url); DomainImageExtractionHandler handler = _handlerMap.get(host); if (handler!=null){ ExtractedImage img = handler.extract(url); if (img!=null){ extractedMap.put("image", img.thumbnailImage); extractedMap.put("fullimage", img.fullImage); return true; } } return false; } }