package org.meaningfulweb.cext; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import org.jdom.Document; public class HtmlContentPipeline { private List<HtmlContentProcessor> processors; private int current = 0; private String name; private Map<String, Object> metadata = new LinkedHashMap<String, Object>(); public HtmlContentPipeline() { } public List<HtmlContentProcessor> getProcessors() { return processors; } public void setProcessors(List<HtmlContentProcessor> processors) { this.processors = processors; } public int getCurrent() { return current; } public String getName() { return name; } public void setName(String name) { this.name = name; } public Map<String, Object> getMetadata() { return metadata; } public void setMetadata(Map<String, Object> metadata) { this.metadata = metadata; } public Map<String, Object> processPipeline(Document document) { Map<String, Object> extract = new LinkedHashMap<String, Object>(); if (processors != null) { for (int i = 0; i < processors.size(); i++) { HtmlContentProcessor processor = processors.get(i); processor.setMetadata(metadata); String procName = processor.getName(); boolean good = processor.processContent(document); Map<String, Object> curExtract = processor.getExtracted(); for (String key : curExtract.keySet()) { String fullname = name + "." + procName + "." + key; Object extractVal = curExtract.get(key); extract.put(fullname, extractVal); } if (!good) { break; } } } return extract; } }