// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2005 John Derrick // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/filters/LinkRegexFilter.java,v $ // $Author: derrickoswald $ // $Date: 2005/09/18 23:40:44 $ // $Revision: 1.4 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.filters; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.htmlparser.Node; import org.htmlparser.NodeFilter; import org.htmlparser.tags.LinkTag; /** * This class accepts tags of class LinkTag that contain a link matching a given * regex pattern. Use this filter to extract LinkTag nodes with URLs that match * the desired regex pattern. */ public class LinkRegexFilter implements NodeFilter { /** * The regular expression to use on the link. */ protected Pattern mRegex; /** * Creates a LinkRegexFilter that accepts LinkTag nodes containing * a URL that matches the supplied regex pattern. * The match is case insensitive. * @param regexPattern The pattern to match. */ public LinkRegexFilter (String regexPattern) { this (regexPattern, true); } /** * Creates a LinkRegexFilter that accepts LinkTag nodes containing * a URL that matches the supplied regex pattern. * @param regexPattern The regex pattern to match. * @param caseSensitive Specifies case sensitivity for the matching process. */ public LinkRegexFilter (String regexPattern, boolean caseSensitive) { if (caseSensitive) mRegex = Pattern.compile (regexPattern); else mRegex = Pattern.compile (regexPattern, Pattern.CASE_INSENSITIVE | Pattern.UNICODE_CASE); } /** * Accept nodes that are a LinkTag and have a URL * that matches the regex pattern supplied in the constructor. * @param node The node to check. * @return <code>true</code> if the node is a link with the pattern. */ public boolean accept (Node node) { boolean ret; ret = false; if (LinkTag.class.isAssignableFrom (node.getClass ())) { String link = ((LinkTag)node).getLink (); Matcher matcher = mRegex.matcher (link); ret = matcher.find (); } return (ret); } }