package com.bao.lc.common;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.tags.LinkTag;
/**
* This class accepts tags of class LinkTag that contain a text matching a given
* regex pattern. Use this filter to extract LinkTag nodes with text that match
* the desired regex pattern.
*/
@SuppressWarnings("serial")
public class LinkTextRegexFilter implements NodeFilter
{
/**
* The regular expression to use on the link.
*/
protected Pattern mRegex;
/**
* Creates a LinkRegexFilter that accepts LinkTag nodes containing
* a text that matches the supplied regex pattern.
* The match is case insensitive.
* @param regexPattern The pattern to match.
*/
public LinkTextRegexFilter (String regexPattern)
{
this (regexPattern, true);
}
/**
* Creates a LinkRegexFilter that accepts LinkTag nodes containing
* a text that matches the supplied regex pattern.
* @param regexPattern The regex pattern to match.
* @param caseSensitive Specifies case sensitivity for the matching process.
*/
public LinkTextRegexFilter (String regexPattern, boolean caseSensitive)
{
if (caseSensitive)
mRegex = Pattern.compile (regexPattern);
else
mRegex = Pattern.compile (regexPattern,
Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE);
}
/**
* Accept nodes that are a LinkTag and have a text
* that matches the regex pattern supplied in the constructor.
* @param node The node to check.
* @return <code>true</code> if the node is a link with the pattern.
*/
@Override
public boolean accept (Node node)
{
boolean ret = false;
if(LinkTag.class.isAssignableFrom (node.getClass ()))
{
String linkText = ((LinkTag)node).getLinkText();
Matcher matcher = mRegex.matcher(linkText);
ret = matcher.find ();
}
return (ret);
}
}