// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Somik Raha // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tags/ImageTag.java,v $ // $Author: derrickoswald $ // $Date: 2005/04/10 23:20:45 $ // $Revision: 1.49 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tags; import java.util.Locale; import java.util.Vector; import org.htmlparser.Attribute; import org.htmlparser.nodes.TagNode; import org.htmlparser.util.ParserUtils; /** * Identifies an image tag. */ public class ImageTag extends TagNode { /** * The set of names handled by this tag. */ private static final String[] mIds = new String[] {"IMG"}; /** * Holds the set value of the SRC attribute, since this can differ * from the attribute value due to relative references resolved by * the scanner. */ protected String imageURL; /** * Create a new image tag. */ public ImageTag () { imageURL = null; } /** * Return the set of names handled by this tag. * @return The names to be matched that create tags of this type. */ public String[] getIds () { return (mIds); } /** * Extract the location of the image * Given the tag (with attributes), and the url of the html page in which * this tag exists, perform best effort to extract the 'intended' URL. * Attempts to handle such attributes as: * <pre> * <IMG SRC=http://www.redgreen.com> - normal * <IMG SRC =http://www.redgreen.com> - space between attribute name and equals sign * <IMG SRC= http://www.redgreen.com> - space between equals sign and attribute value * <IMG SRC = http://www.redgreen.com> - space both sides of equals sign * </pre> * @return The relative URL for the image. */ public String extractImageLocn () { Vector attributes; int size; Attribute attribute; String string; String data; int state; String name; String ret; // TODO: move this logic into the lexer? ret = ""; state = 0; attributes = getAttributesEx (); size = attributes.size (); for (int i = 0; (i < size) && (state < 3); i++) { attribute = (Attribute)attributes.elementAt (i); string = attribute.getName (); data = attribute.getValue (); switch (state) { case 0: // looking for 'src' if (null != string) { name = string.toUpperCase (Locale.ENGLISH); if (name.equals ("SRC")) { state = 1; if (null != data) { if ("".equals (data)) state = 2; // empty attribute, SRC= else { ret = data; i = size; // exit fast } } } else if (name.startsWith ("SRC")) { // missing equals sign string = string.substring (3); // remove any double quotes from around string if (string.startsWith ("\"") && string.endsWith ("\"") && (1 < string.length ())) string = string.substring (1, string.length () - 1); // remove any single quote from around string if (string.startsWith ("'") && string.endsWith ("'") && (1 < string.length ())) string = string.substring (1, string.length () - 1); ret = string; state = 0; // go back to searching for SRC // because, maybe we found SRCXXX // where XXX isn't a URL } } break; case 1: // looking for equals sign if (null != string) { if (string.startsWith ("=")) { state = 2; if (1 < string.length ()) { ret = string.substring (1); state = 0; // keep looking ? } else if (null != data) { ret = string.substring (1); state = 0; // keep looking ? } } } break; case 2: // looking for a valueless attribute that could be a relative or absolute URL if (null != string) { if (null == data) ret = string; state = 0; // only check first non-whitespace item // not every valid attribute after an equals } break; default: throw new IllegalStateException ("we're not supposed to in state " + state); } } ret = ParserUtils.removeChars (ret, '\n'); ret = ParserUtils.removeChars (ret, '\r'); return (ret); } /** * Returns the location of the image. * @return The absolute URL for this image. */ public String getImageURL() { if (null == imageURL) if (null != getPage ()) imageURL = getPage ().getAbsoluteURL (extractImageLocn ()); return (imageURL); } /** * Set the <code>SRC</code> attribute. * @param url The new value of the <code>SRC</code> attribute. */ public void setImageURL (String url) { imageURL = url; setAttribute ("SRC", imageURL); } }