package org.apache.nutchbase.parse;
import org.w3c.dom.DocumentFragment;
// Hadoop imports
import org.apache.hadoop.conf.Configurable;
// Nutch imports
import org.apache.nutch.parse.HTMLMetaTags;
import org.apache.nutchbase.plugin.PluggableHbase;
import org.apache.nutchbase.util.hbase.RowPart;
/** Extension point for DOM-based HTML parsers. Permits one to add additional
* metadata to HTML parses. All plugins found which implement this extension
* point are run sequentially on the parse.
*/
public interface HtmlParseFilterHbase extends PluggableHbase, Configurable {
/** The name of the extension point. */
final static String X_POINT_ID = HtmlParseFilterHbase.class.getName();
/** Adds metadata or otherwise modifies a parse of HTML content, given
* the DOM tree of a page. */
ParseHbase filter(String url, RowPart row, ParseHbase parseResult,
HTMLMetaTags metaTags, DocumentFragment doc);
}