// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Somik Raha
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/LinkTag.java,v $
// $Author: derrickoswald $
// $Date: 2005/04/10 23:20:45 $
// $Revision: 1.54 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
package org.htmlparser.tags;
import org.htmlparser.Node;
import org.htmlparser.util.ParserUtils;
import org.htmlparser.util.SimpleNodeIterator;
/**
* Identifies a link tag.
*/
public class LinkTag extends CompositeTag
{
/**
* The set of names handled by this tag.
*/
private static final String[] mIds = new String[] {"A"};
/**
* The set of tag names that indicate the end of this tag.
*/
private static final String[] mEnders = new String[] {"A", "P", "DIV", "TD", "TR", "FORM", "LI"};
/**
* The set of end tag names that indicate the end of this tag.
*/
private static final String[] mEndTagEnders = new String[] {"P", "DIV", "TD", "TR", "FORM", "LI", "BODY", "HTML"};
/**
* The URL where the link points to
*/
protected String mLink;
/**
* Set to true when the link was a mailto: URL.
*/
private boolean mailLink;
/**
* Set to true when the link was a javascript: URL.
*/
private boolean javascriptLink;
/**
* Constructor creates an LinkTag object, which basically stores the location
* where the link points to, and the text it contains.
* <p>
* In order to get the contents of the link tag, use the method linkData(),
* which returns an enumeration of nodes encapsulated within the link.
* <p>
* The following code will get all the images inside a link tag.
* <pre>
* Node node ;
* ImageTag imageTag;
* for (Enumeration e=linkTag.linkData();e.hasMoreElements();) {
* node = (Node)e.nextElement();
* if (node instanceof ImageTag) {
* imageTag = (ImageTag)node;
* // Process imageTag
* }
* }
* </pre>
*/
public LinkTag ()
{
}
/**
* Return the set of names handled by this tag.
* @return The names to be matched that create tags of this type.
*/
public String[] getIds ()
{
return (mIds);
}
/**
* Return the set of tag names that cause this tag to finish.
* @return The names of following tags that stop further scanning.
*/
public String[] getEnders ()
{
return (mEnders);
}
/**
* Return the set of end tag names that cause this tag to finish.
* @return The names of following end tags that stop further scanning.
*/
public String[] getEndTagEnders ()
{
return (mEndTagEnders);
}
/**
* Get the <code>ACCESSKEY</code> attribute, if any.
* @return The value of the <code>ACCESSKEY</code> attribute,
* or <code>null</code> if the attribute doesn't exist.
*/
public String getAccessKey()
{
return (getAttribute("ACCESSKEY"));
}
/**
* Returns the url as a string, to which this link points.
* This string has had the "mailto:" and "javascript:" protocol stripped
* off the front (if those predicates return <code>true</code>) but not
* for other protocols. Don't ask me why, it's a legacy thing.
* @return The URL for this <code>A</code> tag.
*/
public String getLink()
{
if (null == mLink)
{
mailLink=false;
javascriptLink = false;
mLink = extractLink ();
int mailto = mLink.indexOf("mailto");
if (mailto==0)
{
// yes it is
mailto = mLink.indexOf(":");
mLink = mLink.substring(mailto+1);
mailLink = true;
}
int javascript = mLink.indexOf("javascript:");
if (javascript == 0)
{
mLink = mLink.substring(11); // this magic number is "javascript:".length()
javascriptLink = true;
}
}
return (mLink);
}
/**
* Returns the text contained inside this link tag.
* @return The textual contents between the {@.html <A></A>} pair.
*/
public String getLinkText()
{
String ret;
if (null != getChildren ())
ret = getChildren ().asString ();
else
ret = "";
return (ret);
}
/**
* Is this a mail address
* @return boolean true/false
*/
public boolean isMailLink()
{
getLink (); // force an evaluation of the booleans
return (mailLink);
}
/**
* Tests if the link is javascript
* @return flag indicating if the link is a javascript code
*/
public boolean isJavascriptLink()
{
getLink (); // force an evaluation of the booleans
return (javascriptLink);
}
/**
* Tests if the link is an FTP link.
*
* @return flag indicating if this link is an FTP link
*/
public boolean isFTPLink() {
return getLink ().indexOf("ftp://")==0;
}
/**
* Tests if the link is an IRC link.
* @return flag indicating if this link is an IRC link
*/
public boolean isIRCLink() {
return getLink ().indexOf("irc://")==0;
}
/**
* Tests if the link is an HTTP link.
*
* @return flag indicating if this link is an HTTP link
*/
public boolean isHTTPLink()
{
return (!isFTPLink() && !isHTTPSLink() && !isJavascriptLink() && !isMailLink() && !isIRCLink());
}
/**
* Tests if the link is an HTTPS link.
*
* @return flag indicating if this link is an HTTPS link
*/
public boolean isHTTPSLink() {
return getLink ().indexOf("https://")==0;
}
/**
* Tests if the link is an HTTP link or one of its variations (HTTPS, etc.).
*
* @return flag indicating if this link is an HTTP link or one of its variations (HTTPS, etc.)
*/
public boolean isHTTPLikeLink() {
return isHTTPLink() || isHTTPSLink();
}
/**
* Insert the method's description here.
* Creation date: (8/3/2001 1:49:31 AM)
* @param newMailLink boolean
*/
public void setMailLink(boolean newMailLink)
{
mailLink = newMailLink;
}
/**
* Set the link as a javascript link.
*
* @param newJavascriptLink flag indicating if the link is a javascript code
*/
public void setJavascriptLink(boolean newJavascriptLink)
{
javascriptLink = newJavascriptLink;
}
/**
* Return the contents of this link node as a string suitable for debugging.
* @return A string representation of this node.
*/
public String toString()
{
StringBuffer sb = new StringBuffer();
sb.append("Link to : "+ getLink() + "; titled : "+getLinkText ()+"; begins at : "+getStartPosition ()+"; ends at : "+getEndPosition ()+ ", AccessKey=");
if (getAccessKey ()==null)
sb.append("null\n");
else
sb.append(getAccessKey ()+"\n");
if (null != getChildren ())
{
sb.append(" "+"LinkData\n");
sb.append(" "+"--------\n");
Node node;
int i = 0;
for (SimpleNodeIterator e=children();e.hasMoreNodes();)
{
node = e.nextNode();
sb.append(" "+(i++)+ " ");
sb.append(node.toString()+"\n");
}
}
sb.append(" "+"*** END of LinkData ***\n");
return sb.toString();
}
/**
* Set the <code>HREF</code> attribute.
* @param link The new value of the <code>HREF</code> attribute.
*/
public void setLink(String link)
{
mLink = link;
setAttribute ("HREF", link);
}
/**
* This method returns an enumeration of data that it contains
* @return Enumeration
* @deprecated Use children() instead.
*/
public SimpleNodeIterator linkData() {
return children();
}
/**
* Extract the link from the HREF attribute.
* @return The URL from the HREF attibute. This is absolute if the tag has
* a valid page.
*/
public String extractLink ()
{
String ret;
ret = getAttribute ("HREF");
if (null != ret)
{
ret = ParserUtils.removeChars (ret,'\n');
ret = ParserUtils.removeChars (ret,'\r');
}
if (null != getPage ())
ret = getPage ().getAbsoluteURL (ret);
return (ret);
}
}