package org.meaningfulweb.cext.processors; import java.util.Collection; import java.util.HashSet; import java.util.LinkedHashSet; import java.util.List; import java.util.Set; import org.meaningfulweb.cext.HtmlContentProcessor; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.jdom.Content; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.xpath.XPath; public class XPathCleanerProcessor extends HtmlContentProcessor { public static final Log LOG = LogFactory.getLog(XPathCleanerProcessor.class); private Set<String> xpaths = new LinkedHashSet<String>(); @Override public boolean processContent(Document document) { if (xpaths != null && xpaths.size() > 0) { for (String xpath : xpaths) { try { XPath xp = XPath.newInstance(xpath); List<Element> selectedNodes = xp.selectNodes(document); if (selectedNodes != null && selectedNodes.size() > 0) { for (Content content : selectedNodes) { content.getParent().removeContent(content); } } } catch (JDOMException e) { e.printStackTrace(); } } } return true; } public Collection<String> getXpaths() { return xpaths; } public void setXpaths(Collection<String> xpaths) { if (xpaths != null) { if (xpaths instanceof Set) { this.xpaths = (Set<String>)xpaths; } else { Set<String> newXpaths = new HashSet<String>(); newXpaths.addAll(xpaths); this.xpaths = newXpaths; } } } }