// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Somik Raha // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v $ // $Author: derrickoswald $ // $Date: 2005/04/10 23:20:45 $ // $Revision: 1.54 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tags; import org.htmlparser.Node; import org.htmlparser.util.ParserUtils; import org.htmlparser.util.SimpleNodeIterator; /** * Identifies a link tag. */ public class LinkTag extends CompositeTag { /** * The set of names handled by this tag. */ private static final String[] mIds = new String[] {"A"}; /** * The set of tag names that indicate the end of this tag. */ private static final String[] mEnders = new String[] {"A", "P", "DIV", "TD", "TR", "FORM", "LI"}; /** * The set of end tag names that indicate the end of this tag. */ private static final String[] mEndTagEnders = new String[] {"P", "DIV", "TD", "TR", "FORM", "LI", "BODY", "HTML"}; /** * The URL where the link points to */ protected String mLink; /** * Set to true when the link was a mailto: URL. */ private boolean mailLink; /** * Set to true when the link was a javascript: URL. */ private boolean javascriptLink; /** * Constructor creates an LinkTag object, which basically stores the location * where the link points to, and the text it contains. * <p> * In order to get the contents of the link tag, use the method linkData(), * which returns an enumeration of nodes encapsulated within the link. * <p> * The following code will get all the images inside a link tag. * <pre> * Node node ; * ImageTag imageTag; * for (Enumeration e=linkTag.linkData();e.hasMoreElements();) { * node = (Node)e.nextElement(); * if (node instanceof ImageTag) { * imageTag = (ImageTag)node; * // Process imageTag * } * } * </pre> */ public LinkTag () { } /** * Return the set of names handled by this tag. * @return The names to be matched that create tags of this type. */ public String[] getIds () { return (mIds); } /** * Return the set of tag names that cause this tag to finish. * @return The names of following tags that stop further scanning. */ public String[] getEnders () { return (mEnders); } /** * Return the set of end tag names that cause this tag to finish. * @return The names of following end tags that stop further scanning. */ public String[] getEndTagEnders () { return (mEndTagEnders); } /** * Get the <code>ACCESSKEY</code> attribute, if any. * @return The value of the <code>ACCESSKEY</code> attribute, * or <code>null</code> if the attribute doesn't exist. */ public String getAccessKey() { return (getAttribute("ACCESSKEY")); } /** * Returns the url as a string, to which this link points. * This string has had the "mailto:" and "javascript:" protocol stripped * off the front (if those predicates return <code>true</code>) but not * for other protocols. Don't ask me why, it's a legacy thing. * @return The URL for this <code>A</code> tag. */ public String getLink() { if (null == mLink) { mailLink=false; javascriptLink = false; mLink = extractLink (); int mailto = mLink.indexOf("mailto"); if (mailto==0) { // yes it is mailto = mLink.indexOf(":"); mLink = mLink.substring(mailto+1); mailLink = true; } int javascript = mLink.indexOf("javascript:"); if (javascript == 0) { mLink = mLink.substring(11); // this magic number is "javascript:".length() javascriptLink = true; } } return (mLink); } /** * Returns the text contained inside this link tag. * @return The textual contents between the {@.html <A></A>} pair. */ public String getLinkText() { String ret; if (null != getChildren ()) ret = getChildren ().asString (); else ret = ""; return (ret); } /** * Is this a mail address * @return boolean true/false */ public boolean isMailLink() { getLink (); // force an evaluation of the booleans return (mailLink); } /** * Tests if the link is javascript * @return flag indicating if the link is a javascript code */ public boolean isJavascriptLink() { getLink (); // force an evaluation of the booleans return (javascriptLink); } /** * Tests if the link is an FTP link. * * @return flag indicating if this link is an FTP link */ public boolean isFTPLink() { return getLink ().indexOf("ftp://")==0; } /** * Tests if the link is an IRC link. * @return flag indicating if this link is an IRC link */ public boolean isIRCLink() { return getLink ().indexOf("irc://")==0; } /** * Tests if the link is an HTTP link. * * @return flag indicating if this link is an HTTP link */ public boolean isHTTPLink() { return (!isFTPLink() && !isHTTPSLink() && !isJavascriptLink() && !isMailLink() && !isIRCLink()); } /** * Tests if the link is an HTTPS link. * * @return flag indicating if this link is an HTTPS link */ public boolean isHTTPSLink() { return getLink ().indexOf("https://")==0; } /** * Tests if the link is an HTTP link or one of its variations (HTTPS, etc.). * * @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.) */ public boolean isHTTPLikeLink() { return isHTTPLink() || isHTTPSLink(); } /** * Insert the method's description here. * Creation date: (8/3/2001 1:49:31 AM) * @param newMailLink boolean */ public void setMailLink(boolean newMailLink) { mailLink = newMailLink; } /** * Set the link as a javascript link. * * @param newJavascriptLink flag indicating if the link is a javascript code */ public void setJavascriptLink(boolean newJavascriptLink) { javascriptLink = newJavascriptLink; } /** * Return the contents of this link node as a string suitable for debugging. * @return A string representation of this node. */ public String toString() { StringBuffer sb = new StringBuffer(); sb.append("Link to : "+ getLink() + "; titled : "+getLinkText ()+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition ()+ ", AccessKey="); if (getAccessKey ()==null) sb.append("null\n"); else sb.append(getAccessKey ()+"\n"); if (null != getChildren ()) { sb.append(" "+"LinkData\n"); sb.append(" "+"--------\n"); Node node; int i = 0; for (SimpleNodeIterator e=children();e.hasMoreNodes();) { node = e.nextNode(); sb.append(" "+(i++)+ " "); sb.append(node.toString()+"\n"); } } sb.append(" "+"*** END of LinkData ***\n"); return sb.toString(); } /** * Set the <code>HREF</code> attribute. * @param link The new value of the <code>HREF</code> attribute. */ public void setLink(String link) { mLink = link; setAttribute ("HREF", link); } /** * This method returns an enumeration of data that it contains * @return Enumeration * @deprecated Use children() instead. */ public SimpleNodeIterator linkData() { return children(); } /** * Extract the link from the HREF attribute. * @return The URL from the HREF attibute. This is absolute if the tag has * a valid page. */ public String extractLink () { String ret; ret = getAttribute ("HREF"); if (null != ret) { ret = ParserUtils.removeChars (ret,'\n'); ret = ParserUtils.removeChars (ret,'\r'); } if (null != getPage ()) ret = getPage ().getAbsoluteURL (ret); return (ret); } }