/* * Copyright (c) 2001-2007 Sun Microsystems, Inc. All rights reserved. * * The Sun Project JXTA(TM) Software License * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * 1. Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * * 3. The end-user documentation included with the redistribution, if any, must * include the following acknowledgment: "This product includes software * developed by Sun Microsystems, Inc. for JXTA(TM) technology." * Alternately, this acknowledgment may appear in the software itself, if * and wherever such third-party acknowledgments normally appear. * * 4. The names "Sun", "Sun Microsystems, Inc.", "JXTA" and "Project JXTA" must * not be used to endorse or promote products derived from this software * without prior written permission. For written permission, please contact * Project JXTA at http://www.jxta.org. * * 5. Products derived from this software may not be called "JXTA", nor may * "JXTA" appear in their name, without prior written permission of Sun. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED WARRANTIES, * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SUN * MICROSYSTEMS OR ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * JXTA is a registered trademark of Sun Microsystems, Inc. in the United * States and other countries. * * Please see the license information page at : * <http://www.jxta.org/project/www/license.html> for instructions on use of * the license in source files. * * ==================================================================== * * This software consists of voluntary contributions made by many individuals * on behalf of Project JXTA. For more information on Project JXTA, please see * http://www.jxta.org. * * This license is based on the BSD license adopted by the Apache Foundation. */ package net.jxta.impl.document; import net.jxta.document.Attribute; import net.jxta.document.XMLElement; import net.jxta.logging.Logging; import java.io.IOException; import java.io.Writer; import java.util.ArrayList; import java.util.Collections; import java.util.Enumeration; import java.util.Iterator; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; /** * An element of a <CODE>StructuredDocument</CODE>. <CODE>StructuredDocument</CODE>s * are made up of hierarchies of elements. LiteXMLElement is part of an implementation * while makes use of XML-style document conventions, but without the overhead of a * full parser. */ public class LiteXMLElement implements XMLElement<LiteXMLElement> { /** * Defines a range of characters, probably within a string. The range is * deemed to be invalid if 'start' is -1. A zero length range is, by * convention, described by an 'end' value of 'start' - 1. */ protected static class charRange implements Comparable<charRange> { /** * Contains the start position of this range. */ public int start; /** * Contains the end position of this range. one weird thing: if end == start -1, * then the item is of zero length beginning at start. */ public int end; /** * Constructor for a null charRange. */ public charRange() { start = -1; end = -1; } /** * Constructor for which the bounds are specified. */ public charRange(int start, int end) { this.start = start; this.end = end; } /** * {@inheritDoc} */ @Override public boolean equals(Object aRange) { if (this == aRange) { return true; } if (!(aRange instanceof charRange)) { return false; } charRange someRange = (charRange) aRange; return (start == someRange.start) && (end == someRange.end); } /** * {@inheritDoc} */ public int compareTo(charRange someRange) { if (this == someRange) { return 0; } if (start < someRange.start) { return -1; } if (start > someRange.start) { return 1; } if (end < someRange.end) { return -1; } if (end > someRange.end) { return 1; } return 0; } /** * {@inheritDoc} */ @Override public String toString() { return "[" + start + "," + end + "]"; } /** * Returns true if the <CODE>charRange</CODE> specified by someRange is * contained within this range. * * @param someRange The range which must be contained within this range. * @return true if the specified range is contained with this range otherwise false. */ public boolean contains(charRange someRange) { return (isValid() && someRange.isValid() && (start <= someRange.start) && (end >= someRange.end)); } /** * Returns true if the <CODE>tagRange</CODE> specified by someRange is * contained within this range. * * @param someRange The range which must be contained within this range. * @return true if the specified range is contained with this range otherwise false. */ public boolean contains(tagRange someRange) { return (isValid() && someRange.isValid() && (start <= someRange.startTag.start) && (end >= someRange.endTag.end)); } /** * Returns true if the location specified is contained in this range. * * @param someLoc the location which is to be tested. * @return true if the location is in this range, otherwise false. */ public boolean contains(int someLoc) { return (isValid() && (someLoc >= 0) && (start <= someLoc) && (end >= someLoc)); } /** * Returns true if the range is both non-null and has a length of greater * than or equal to zero. * * @return true if the range is a valid one, otherwise false. */ public boolean isValid() { return length() >= 0; } /** * Returns the length of this range. * * @return The length of the range or -1 if the range is null. */ public int length() { if ((-1 == start) || (-1 == end)) { return -1; } return (end - start + 1); } } /** * A tagRange is a collection of char ranges useful for describing XML * structures. * <p/> * <p/><dl> * <dt><code>startTag</code></dt> * <dd>The range of the opening tag, ie. <tag></dd> * <dt><code>body</code></dt> * <dd>Everything between <code>startTag</code> and <code>endTag</code>.</dd> * <dt><code>endTag</code></dt> * <dd>The range of the terminating tag, ie. </tag>.</dd> * </dl> * <p/> * <p/>For empty-element tags the <code>startTag</code>, <code>body</code> * and <code>endTag</code> will be equal. */ protected static class tagRange implements Comparable<tagRange> { public charRange startTag; public charRange body; public charRange endTag; public tagRange() { startTag = new charRange(); body = new charRange(); endTag = new charRange(); } public tagRange(charRange startTag, charRange body, charRange endTag) { this.startTag = startTag; this.body = body; this.endTag = endTag; } /** * {@inheritDoc} */ @Override public boolean equals(Object aRange) { if (this == aRange) { return true; } if (!(aRange instanceof tagRange)) { return false; } tagRange likeMe = (tagRange) aRange; return startTag.equals(likeMe.startTag) && body.equals(likeMe.body) && endTag.equals(likeMe.endTag); } /** * {@inheritDoc} */ public int compareTo(tagRange someRange) { if (this == someRange) { return 0; } int compared = startTag.compareTo(someRange.startTag); if (0 != compared) { return compared; } return endTag.compareTo(someRange.endTag); } /** * {@inheritDoc} */ @Override public String toString() { return startTag + ":" + body + ":" + endTag; } /** * Returns true if the <CODE>tagRange</CODE> specified by someRange is * contained within the body portion of this range. * * @param someRange The range which must be contained within this range. * @return true if the specified range is contained with this range * otherwise false. */ public boolean contains(tagRange someRange) { return (isValid() && someRange.isValid() && (body.start <= someRange.startTag.start) && (body.end >= someRange.endTag.end)); } /** * Returns true if the <CODE>charRange</CODE> specified by someRange is * contained within the body portion of this range. * * @param someRange The range which must be contained within this range. * @return true if the specified range is contained with this range * otherwise false. */ public boolean contains(charRange someRange) { return (isValid() && someRange.isValid() && (body.start <= someRange.start) && (body.end >= someRange.end)); } /** * @return <code>true</code> if this tagRange represents and empty * element. */ public boolean isEmptyElement() { return isValid() && startTag.equals(body) && startTag.equals(endTag); } /** * @return true if valid */ public boolean isValid() { return (null != startTag) && (null != body) && (null != endTag) && startTag.isValid() && body.isValid() && endTag.isValid(); } } /** * Log4J Logger */ private final static transient Logger LOG = Logger.getLogger(LiteXMLElement.class.getName()); /** * If true then every operation which modifies the state of the document will * perform a consistency check. This is a deadly performance killer but * helps a lot in isolating bugs. */ protected final static transient boolean paranoidConsistencyChecking = false; /** * The document associated with this Element. */ protected final transient LiteXMLDocument doc; /** * Identifies the element which is the parent of this element. If <code> * this.parent == this</code> then this element is the root of the document. * If <code>null == parent</code> then this element has not yet been * inserted into the document. */ protected transient LiteXMLElement parent; /** * The portion of the source XML associated with this node */ protected transient tagRange loc; /** * If this node has yet to be inserted into the document then will contain * the String value of this node, otherwise null. */ private transient StringBuilder uninserted = null; /** * The child elements associated with this element */ private transient List<LiteXMLElement> children; /** * Creates new LiteXMLElement * * @param loc The location of the element within the document. * @param doc The {@link LiteXMLDocument} which is the root of the document. */ protected LiteXMLElement(LiteXMLDocument doc, tagRange loc) { this.doc = doc; this.loc = loc; } /** * Creates new LiteElement * * @param doc The {@link LiteXMLDocument} which is the root of the document. * @param name The name of the element being created. * @param val The value of the element being created or null if there is no * content to the element. */ public LiteXMLElement(LiteXMLDocument doc, final String name, final String val) { this(doc, new tagRange()); for (int eachChar = name.length() - 1; eachChar >= 0; eachChar--) { if (Character.isWhitespace(name.charAt(eachChar))) { throw new IllegalArgumentException("Element names may not contain spaces."); } } if ((null == val) || (0 == val.length())) { uninserted = new StringBuilder("<" + name + "/>"); } else { uninserted = new StringBuilder(val); encodeEscaped(uninserted); uninserted.insert(0, "<" + name + ">"); uninserted.append("</").append(name).append(">"); } } /** * {@inheritDoc} */ @Override public boolean equals(Object element) { if (this == element) { return true; } if (!(element instanceof LiteXMLElement)) { return false; } LiteXMLElement liteElement = (LiteXMLElement) element; if (getDocument() != liteElement.getDocument()) { return false; } if (!getName().equals(liteElement.getName())) { return false; } String val1; if (null != uninserted) { val1 = uninserted.toString(); } else { val1 = getTextValue(); } String val2 = liteElement.getTextValue(); if ((null == val1) && (null == val2)) { return true; } return null != val1 && null != val2 && val1.equals(val2); } /** * {@inheritDoc} * <p/> * <p/>A toString implementation for debugging purposes. */ @Override public String toString() { if (paranoidConsistencyChecking) { checkConsistency(); } String name = getName(); if (name == null) { name = "<<null name>>"; } String value = getTextValue(); if (value == null) { value = "<<null value>>"; } if ((value.length() + name.length()) >= 60) { int len = Math.max(20, 60 - name.length()); value = value.substring(0, Math.min(len, value.length())); } // FIXME 20021125 bondolo@jxta.org should remove carriage control. return super.toString() + " / " + name + " = " + value; } /** * {@inheritDoc} */ public LiteXMLDocument getRoot() { return getDocument(); } /** * {@inheritDoc} */ public LiteXMLElement getParent() { return parent; } /** * {@inheritDoc} */ public Enumeration<LiteXMLElement> getChildren() { if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (null == children) { List<LiteXMLElement> empty = Collections.emptyList(); return Collections.enumeration(empty); } else { return Collections.enumeration(children); } } /** * {@inheritDoc} */ public String getName() { if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (paranoidConsistencyChecking) { checkConsistency(); } int current = loc.startTag.start + 1; while (current <= loc.startTag.end) { char inTagName = getDocument().docContent.charAt(current); if (Character.isWhitespace(inTagName) || ('/' == inTagName) || ('>' == inTagName)) { break; } current++; } return getDocument().docContent.substring(loc.startTag.start + 1, current); } /** * Get the name associated with an element. * * @return A string containing the key of this element. */ public String getKey() { return getName(); } /** * Get the value (if any) associated with an element. * * @return A string containing the value of this element, if any, otherwise null. */ public String getValue() { return getTextValue(); } /** * {@inheritDoc} */ public void appendChild(LiteXMLElement element) { if (element.getDocument() != getDocument()) { throw new IllegalArgumentException("Wrong document"); } if (null != element.parent) { throw new IllegalArgumentException("New element is already in document"); } if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (paranoidConsistencyChecking) { checkConsistency(); } // If uninserted then this new element contains content which needs to // be added to the document. If uninserted is null then the child // element's content is already in the document, but merely needs to // be recognized as a child. if (null != element.uninserted) { if (loc.startTag.equals(loc.endTag)) { getDocument().docContent.deleteCharAt(loc.endTag.end - 1); // delete the / loc.startTag.end -= 1; // skip past the name portion int current = loc.startTag.start + 1; while (current <= loc.startTag.end) { char inTagName = getDocument().docContent.charAt(current); if (Character.isWhitespace(inTagName) || ('>' == inTagName)) { break; } current++; } String tagName = getDocument().docContent.substring(loc.startTag.start + 1, current); getDocument().docContent.insert(loc.startTag.end + 1, "</" + tagName + ">"); getDocument().adjustLocations(loc.startTag.end + 1, tagName.length() + 2); loc.endTag = new charRange(loc.startTag.end + 1, loc.startTag.end + 3 + tagName.length()); loc.body = new charRange(loc.startTag.end + 1, loc.startTag.end); } getDocument().docContent.insert(loc.endTag.start, element.uninserted); element.loc.startTag.start = loc.endTag.start; element.loc.startTag.end = getDocument().docContent.indexOf(">", element.loc.startTag.start); if ('/' != element.uninserted.charAt(element.uninserted.length() - 2)) { element.loc.body.start = element.loc.startTag.end + 1; element.loc.endTag.end = element.loc.startTag.start + element.uninserted.length() - 1; element.loc.endTag.start = getDocument().docContent.lastIndexOf("<", element.loc.endTag.end); element.loc.body.end = element.loc.endTag.start - 1; } else { element.loc.body = new charRange(element.loc.startTag.start, element.loc.startTag.end); element.loc.endTag = new charRange(element.loc.startTag.start, element.loc.startTag.end); } if (0 != loc.body.length()) { getDocument().adjustLocations(loc.endTag.start, element.uninserted.length()); } else { loc.body.start--; getDocument().adjustLocations(loc.endTag.start, element.uninserted.length()); loc.body.start++; } loc.body.end += element.uninserted.length(); element.uninserted = null; } element.parent = this; if (null == children) { children = new ArrayList<LiteXMLElement>(); } children.add(element); if (paranoidConsistencyChecking) { checkConsistency(); } } /** * Returns an enumeration of the immediate children of this element whose * name match the specified string. * * @param key The key which will be matched against. * @return enumeration containing all of the children of this element. */ public Enumeration<LiteXMLElement> getChildren(Object key) { if (key instanceof String) return getChildren((String) key); else throw new ClassCastException(key.getClass().getName() + " not supported by getChildren."); } /** * {@inheritDoc} */ public Enumeration<LiteXMLElement> getChildren(String name) { if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (paranoidConsistencyChecking) { checkConsistency(); } if (null == children) { List<LiteXMLElement> empty = Collections.emptyList(); return Collections.enumeration(empty); } List<LiteXMLElement> result = new ArrayList<LiteXMLElement>(); for (LiteXMLElement aChild : children) { if (name.equals(aChild.getName())) { result.add(aChild); } } return Collections.enumeration(result); } /** * {@inheritDoc} */ public String getTextValue() { return getTextValue(false, true); } /** * Get the value (if any) associated with an element. * * @param getEncoded if true then the contents will be encoded such that * the contents will not be interpreted as XML. see * {@link <a href="http://www.w3.org/TR/REC-xml#syntax">W3C XML 1.0 Specification</a>} * ie. < -> < & -> & * @param trim if true trims prefix and suffix white space * @return A string containing the value of this element, if any, otherwise null. */ protected String getTextValue(boolean getEncoded, boolean trim) { if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (paranoidConsistencyChecking) { checkConsistency(); } StringBuilder building = new StringBuilder(); List<charRange> ranges = new ArrayList<charRange>(); /* * insert the ranges of the children in order. insertion method is ok * because the number of children is usually less than 10 or so. */ for (Enumeration<LiteXMLElement> eachChild = getChildren(); eachChild.hasMoreElements();) { LiteXMLElement aChild = eachChild.nextElement(); charRange childsRange = new charRange(aChild.loc.startTag.start, aChild.loc.endTag.end); // find where to insert. for (int eachRange = 0; eachRange < ranges.size(); eachRange++) { charRange rangeChild = ranges.get(eachRange); if (1 == rangeChild.compareTo(childsRange)) { ranges.set(eachRange, childsRange); childsRange = rangeChild; } } ranges.add(childsRange); } int current = loc.body.start; // add all the text not part of some child for (charRange aRange : ranges) { building.append(getDocument().docContent.substring(current, aRange.start)); current = aRange.end + 1; } // Add the last bit. building.append(getDocument().docContent.substring(current, loc.endTag.start)); if (!getEncoded) { building = decodeEscaped(building); } // trim int firstNonWhiteSpace = 0; int lastNonWhiteSpace = building.length() - 1; if (trim) { while (firstNonWhiteSpace < building.length()) { char possibleSpace = building.charAt(firstNonWhiteSpace); if (!Character.isWhitespace(possibleSpace)) { break; } firstNonWhiteSpace++; } // did we find no non-whitespace? if (firstNonWhiteSpace >= building.length()) { return null; } while (lastNonWhiteSpace >= firstNonWhiteSpace) { char possibleSpace = building.charAt(lastNonWhiteSpace); if (!Character.isWhitespace(possibleSpace)) { break; } lastNonWhiteSpace--; } } String result = building.substring(firstNonWhiteSpace, lastNonWhiteSpace + 1); return result; } /** * Write the contents of this element and optionally its children. The * writing is done to a provided <code>java.io.Writer</code>. The writing * can optionally be indented. * * @param into The java.io.Writer that the output will be sent to. * @param indent the number of tabs which will be inserted before each * line. * @param recurse if true then also print the children of this element. * @throws java.io.IOException if an io error occurs */ protected void printNice(Writer into, int indent, boolean recurse) throws IOException { if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (paranoidConsistencyChecking) { checkConsistency(); } // print start tag StringBuilder start = new StringBuilder(); if (-1 != indent) { // do indent for (int eachTab = 0; eachTab < indent; eachTab++) { start.append('\t'); } } start.append(getDocument().docContent.substring(loc.startTag.start, loc.startTag.end + 1)); if (-1 != indent) { start.append('\n'); } into.write(start.toString()); // print the rest if this was not an empty element. if (!loc.startTag.equals(loc.endTag)) { String itsValue = getTextValue(true, (-1 != indent)); // print node value if (null != itsValue) { if (-1 != indent) { // do indent for (int eachTab = 0; eachTab < indent + 1; eachTab++) { into.write("\t"); } } into.write(itsValue); if (-1 != indent) { into.write('\n'); } } // recurse as needed if (recurse) { int childIndent; Enumeration<LiteXMLElement> childrens = getChildren(); Attribute space = getAttribute("xml:space"); if (null != space) { if ("preserve".equals(space.getValue())) { childIndent = -1; } else { childIndent = indent + 1; } } else { if (-1 != indent) { childIndent = indent + 1; } else { childIndent = -1; } } while (childrens.hasMoreElements()) { LiteXMLElement aChild = childrens.nextElement(); aChild.printNice(into, childIndent, recurse); } } // print end tag StringBuilder end = new StringBuilder(); if (-1 != indent) { // do indent for (int eachTab = 0; eachTab < indent; eachTab++) { end.append('\t'); } } end.append(getDocument().docContent.substring(loc.endTag.start, loc.endTag.end + 1)); if (-1 != indent) { end.append('\n'); } into.write(end.toString()); } } /** * Given a source string, an optional tag and a range with in the source * find either the tag specified or the next tag. * <p/> * The search consists of 4 phases : * 0. If no tag was specified, determine if a tag can be found and * learn its name. * 1. Search for the start of the named tag. * 2. Search for the end tag. Each time we think we have found a tag * which might be the end tag we make sure it is not the end tag * of another element with the same name as our tag. * 3. Calculate the position of the body of the tag given the locations * of the start and end. * * @param source the string to search * @param tag the tag to search for in the source string. If this tag is * empty or null then we will search for the next tag. * @param range describes the range of character locations in the source * string to which the search will be limited. * @return tagRange containing the ranges of the found tag. */ protected tagRange getTagRanges(final StringBuilder source, String tag, final charRange range) { // FIXME bondolo@jxta.org 20010327 Does not handle XML comments. ie. <!-- --> if (null != uninserted) { throw new IllegalStateException("This element has not been added to the document."); } tagRange result = new tagRange(); int start = range.start; int end = source.length() - 1; int current; boolean foundStartTag = false; boolean foundEndTag = false; boolean emptyTag = (null == tag) || (0 == tag.length()); // check for bogosity if ((-1 == start) || (start >= end)) { throw new IllegalArgumentException("Illegal start value"); } // adjust end of range if ((-1 != range.end) && (end > range.end)) { end = range.end; } // check for empty tag and assign empty string if (null == tag) { tag = ""; } if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Searching for \"" + tag + "\" in range [" + start + "," + end + "]"); } current = start; // Begin Phase 0 : Search for any tag. if (emptyTag) { int foundTagText = source.indexOf("<", current); // was it not found? if not then quit if (-1 == foundTagText) { if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("No Tags Found"); } return result; } // this part is about setting the tag if necessary foundTagText++; int afterTagText = foundTagText; while (afterTagText <= end) { char inTagName = source.charAt(afterTagText); if (!Character.isWhitespace(inTagName) && ('/' != inTagName) && ('>' != inTagName)) { afterTagText++; continue; } tag = source.substring(foundTagText, afterTagText); emptyTag = (null == tag) || (0 == tag.length()); break; } // it better not be still empty if (emptyTag) { if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("No tag found"); } return result; } } if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Search for \"" + tag + "\" [" + start + "," + end + "]"); } // Begin Phase 1: Search for the Start Tag while (!foundStartTag && (current < end)) { int foundTagText = source.indexOf(tag, current + 1); // first loc is one past current location int foundTagTerminator; int foundNextTagStart; int afterTagText = foundTagText + tag.length(); // was it not found if ((-1 == foundTagText) || (afterTagText > end)) { if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Tag \"" + tag + "\" Not Found(1)"); } return result; } char checkChar = source.charAt(afterTagText); // check to see if it is the start tag if (('<' != source.charAt(foundTagText - 1)) || // it has the open tag delimiter before it (!Character.isWhitespace(checkChar) && ('/' != checkChar) && ('>' != checkChar))) { // is immediately followed by a delimiter current = afterTagText; continue; } foundTagTerminator = source.indexOf(">", afterTagText); foundNextTagStart = source.indexOf("<", afterTagText + 1); if ((-1 == foundTagTerminator) || // the tag has no terminator (foundTagTerminator > end) || // it is past the valid range ((-1 != foundNextTagStart) && // there is another tag start (foundNextTagStart < foundTagTerminator))) { // and it is before the terminator we found. very bad current = afterTagText; continue; } foundStartTag = true; result.startTag.start = foundTagText - 1; result.startTag.end = foundTagTerminator; } if (!foundStartTag) { if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Tag \"" + tag + "\" Not Found(2)"); } return result; } // is this an empty element declaration? if ('/' == source.charAt(result.startTag.end - 1)) { // end is the start and there is no body result.body = new charRange(result.startTag.start, result.startTag.end); result.endTag = new charRange(result.startTag.start, result.startTag.end); if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Empty Element \"" + tag + "\" Start : " + result.startTag); } return result; } current = result.startTag.end + 1; // if current is past the end then our end tag is not found. if (current >= end) { if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("End not found \"" + tag + "\" Start : " + result.startTag); } return result; } // Begin Phase 2 : Search for the end tag String endTag = "</" + tag + ">"; int searchFrom = result.startTag.end + 1; while (!foundEndTag && (current < end) && (searchFrom < end)) { if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Searching for \"" + endTag + "\" in range [" + current + "," + end + "]"); } int foundTagText = source.indexOf(endTag, current); // was it not found or not in bounds? if ((-1 == foundTagText) || ((foundTagText + endTag.length() - 1) > end)) { break; } // it was not found if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer( "Prospective tag pair for \"" + tag + "\" " + result.startTag + ":[" + foundTagText + "," + (foundTagText + endTag.length() - 1) + "]"); } // We recurse here in order to exclude the end tags of any sub elements with the same name charRange subRange = new charRange(searchFrom, foundTagText - 1); if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Recursing to search for \"" + tag + "\" in " + subRange); } tagRange subElement = getTagRanges(source, tag, subRange); if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Recursion result \"" + tag + "\" " + subElement); } // if there was an incomplete sub-tag with the same name, skip past it if (subElement.startTag.isValid()) { if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Found sub-tag \"" + tag + "\" at " + subElement + " within " + subRange); } if (subElement.endTag.isValid()) { if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Complete sub-tag \"" + tag + "\" at " + subElement + " within " + subRange); } current = subElement.endTag.end + 1; searchFrom = subElement.endTag.end + 1; } else { current = foundTagText + endTag.length(); } continue; } foundEndTag = true; result.endTag.start = foundTagText; result.endTag.end = foundTagText + endTag.length() - 1; if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Prospective tag \"" + tag + "\" " + result.endTag + " is confirmed."); } } // Begin Phase 3 : Calculate the location of the body. result.body.start = result.startTag.end + 1; if (foundEndTag) { result.body.end = result.endTag.start - 1; } else { result.body.end = end; } if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Found element : \"" + tag + "\" " + result); } return result; } /** * Parse a charRange and add any tags found as content as children of a * specified element. This process is repeated recursivly. * * @param scanRange the range to be parsed for sub-tags * @param addTo the element to add any discovered children to. */ protected void addChildTags(final charRange scanRange, LiteXMLElement addTo) { if (null != uninserted) { throw new IllegalStateException("This element has not been added to the document."); } int current = scanRange.start; if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer("Scanning for children in range " + scanRange); } do { // scan for any tag. tagRange aSubtag = getTagRanges(getDocument().docContent, null, new charRange(current, scanRange.end)); // did we find one? if (aSubtag.isValid()) { LiteXMLElement newChild = getDocument().createElement(aSubtag); if (Logging.SHOW_FINER && LOG.isLoggable(Level.FINER)) { LOG.finer( "Adding child tag \"" + getDocument().docContent.substring(aSubtag.endTag.start + 2, aSubtag.endTag.end) + "\" " + aSubtag); } addTo.appendChild(newChild); if (paranoidConsistencyChecking) { checkConsistency(); } if (!aSubtag.startTag.equals(aSubtag.endTag)) { addChildTags(aSubtag.body, newChild); // recurse into the new tag } // all done this tag, move on current = aSubtag.endTag.end + 1; } else { current = -1; // all done! } } while ((-1 != current) && (current < scanRange.end)); if (paranoidConsistencyChecking) { checkConsistency(); } } /** * For this element and all its children adjust the location of its ranges * by the amount specified. * * @param beginningAt adjust all locations which are at or past this * location. * @param by amount to adjust all matching locations. */ protected void adjustLocations(final int beginningAt, final int by) { if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } // Check that this element is not entirely to the left of the shift // zone. NB: end can be < start if len is 0. if (loc.endTag.end < beginningAt && loc.endTag.start < beginningAt) { return; } if ((loc.startTag.end >= beginningAt) || ((loc.startTag.start >= beginningAt) && ((loc.startTag.end + 1) == loc.startTag.start))) { loc.startTag.end += by; } if (loc.startTag.start >= beginningAt) { loc.startTag.start += by; } if ((loc.body.end >= beginningAt) || ((loc.body.start >= beginningAt) && ((loc.body.end + 1) == loc.body.start))) { loc.body.end += by; } if (loc.body.start >= beginningAt) { loc.body.start += by; } if ((loc.endTag.end >= beginningAt) || ((loc.endTag.start >= beginningAt) && ((loc.endTag.end + 1) == loc.endTag.start))) { loc.endTag.end += by; } if (loc.endTag.start >= beginningAt) { loc.endTag.start += by; } for (Enumeration<LiteXMLElement> eachChild = getChildren(); eachChild.hasMoreElements();) { LiteXMLElement aChild = eachChild.nextElement(); aChild.adjustLocations(beginningAt, by); } if (paranoidConsistencyChecking) { checkConsistency(); } } /** * Given a StringBuilder find all occurrences of escaped characters which * must be decoded and convert them back to their non-escaped equivalents. * <p/> * <p/>Also does end of line folding per: <a href="http://www.w3.org/TR/REC-xml#sec-line-ends"/> * * @param target The StringBuilder which will be decoded. * @return The decoded version of the StringBuilder. */ protected StringBuilder decodeEscaped(StringBuilder target) { int current = 0; StringBuilder result = new StringBuilder(target.length()); while (current < target.length()) { // FIXME bondolo@jxta.org 20010422 Should process xml comments out here. // fold 0x0D and 0x0D 0x0A to 0x0A if ('\r' == target.charAt(current)) { result.append('\n'); current++; if ((current < target.length()) && ('\n' == target.charAt(current))) { current++; } continue; } if ('&' != target.charAt(current)) { result.append(target.charAt(current)); current++; continue; } int terminusAt = current + 1; while ((terminusAt < target.length()) && // dont go past end ((terminusAt - current) < 6) && // only look 6 chars away. (';' != target.charAt(terminusAt))) { // must be a ; terminusAt++; } if ((terminusAt >= target.length()) || (';' != target.charAt(terminusAt))) { // if we get here then we didnt find the terminal we needed // so we just leave ampersand as it was, the document is // ill-formed but why make things worse? result.append(target.charAt(current)); current++; continue; } char[] sub = new char[terminusAt - current + 1]; target.getChars(current, terminusAt + 1, sub, 0); String escaped = new String(sub); if ("&".equals(escaped)) { result.append('&'); current += 4; } else if ("<".equals(escaped)) { result.append('<'); current += 3; } else if (">".equals(escaped)) { // for compatibility with SGML. We dont encode these result.append('>'); current += 3; } else if (escaped.startsWith("&#")) { String numericChar = escaped.substring(2, escaped.length() - 1); // is it &#; ? if (numericChar.length() < 1) { result.append(target.charAt(current)); current++; continue; } // is it hex numeric if (numericChar.charAt(0) == 'x') { numericChar = numericChar.substring(1); // is it &#x; ? if (numericChar.length() < 1) { result.append(target.charAt(current)); current++; continue; } try { char asChar = (char) Integer.parseInt(numericChar.toLowerCase(), 16); result.append(asChar); current += escaped.length(); } catch (NumberFormatException badref) { // it was bad, we will just skip it. result.append(target.charAt(current)); current++; } continue; } // its base 10 try { char asChar = (char) Integer.parseInt(numericChar, 10); result.append(asChar); current += escaped.length(); } catch (NumberFormatException badref) { // it was bad, we will just skip it. result.append(target.charAt(current)); current++; } continue; } else { // if we get here then we didn't know what to do with the // entity. so we just send it unchanged. result.append(target.charAt(current)); current++; continue; } current++; } return result; } /** * Given a StringBuilder find all occurrences of characters which must be * escaped and convert them to their escaped equivalents. * * @param target The StringBuilder which will be encoded in place. */ protected void encodeEscaped(StringBuilder target) { int current = 0; while (current < target.length()) { if ('&' == target.charAt(current)) { target.insert(current + 1, "amp;"); current += 5; } else if ('<' == target.charAt(current)) { target.setCharAt(current, '&'); target.insert(current + 1, "lt;"); current += 4; } else { current++; } } } /** * Returns an enumerations of the attributes associated with this object. * Each element is of type Attribute. * * @return Enumeration the attributes associated with this object. */ public Enumeration<Attribute> getAttributes() { List<Attribute> results = new ArrayList<Attribute>(); if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (paranoidConsistencyChecking) { checkConsistency(); } // find the start of the first attribute int current = loc.startTag.start + 1; while (current <= loc.startTag.end) { char inTagName = getDocument().docContent.charAt(current); if (Character.isWhitespace(inTagName) || ('/' == inTagName) || ('>' == inTagName)) { break; } current++; } // loop and add attributes to the vector while (current < loc.startTag.end) { tagRange nextAttr = getAttributeLoc(null, new charRange(current, loc.startTag.end)); if (!nextAttr.isValid()) { break; } results.add( new Attribute(this, getDocument().docContent.substring(nextAttr.startTag.start, nextAttr.startTag.end + 1) , getDocument().docContent.substring(nextAttr.body.start, nextAttr.body.end + 1))); current = nextAttr.endTag.end + 1; } return Collections.enumeration(results); } /** * Returns the tagRange of the next attribute contained in the range * provided. The tag range returned consists of the startTag indicating * the location of the name, body indicating the location of the value and * endTag indicating the location of the final quote delimiter. * * @param name Name to match. null means match any name. * @param inRange the limits of the locations to scan. * @return tagRange containing the location of the next attribute */ protected tagRange getAttributeLoc(String name, charRange inRange) { tagRange result = new tagRange(); int current = inRange.start; do { // skip the whitespace while (current <= inRange.end) { char inTagName = getDocument().docContent.charAt(current); if (!Character.isWhitespace(inTagName) && ('/' != inTagName) && ('>' != inTagName)) { break; } current++; } int equalsAt = getDocument().docContent.indexOf("=", current); // make sure there is an equals if ((-1 == equalsAt) || (equalsAt >= inRange.end)) { return result; } // get the name result.startTag.start = current; result.startTag.end = equalsAt - 1; // get the quote char we must match String requiredQuote = getDocument().docContent.substring(equalsAt + 1, equalsAt + 2); // make sure its a valid quote if (('\'' != requiredQuote.charAt(0)) && ('\"' != requiredQuote.charAt(0))) { return result; } // find the next occurance of this quote int nextQuote = getDocument().docContent.indexOf(requiredQuote, equalsAt + 2); // make sure the quote is in a good spot. if ((-1 == nextQuote) || (nextQuote >= inRange.end)) { return result; } result.body.start = equalsAt + 2; result.body.end = nextQuote - 1; result.endTag.start = nextQuote; result.endTag.end = nextQuote; // check if the name matches. if ((null != name) && !name.equals(getDocument().docContent.substring(result.startTag.start, result.startTag.end + 1))) { result.startTag.start = -1; } current = nextQuote + 1; } while ((current < inRange.end) && (!result.isValid())); return result; } /** * {@inheritDoc} */ public String addAttribute(String name, String value) { if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (null == name) { throw new IllegalArgumentException("name must not be null"); } if (null == value) { throw new IllegalArgumentException("value must not be null"); } for (int eachChar = name.length() - 1; eachChar >= 0; eachChar--) { if (Character.isWhitespace(name.charAt(eachChar))) { throw new IllegalArgumentException("Attribute names may not contain spaces."); } } if (paranoidConsistencyChecking) { checkConsistency(); } // skip past the name portion int current = loc.startTag.start + 1; while (current <= loc.startTag.end) { char inTagName = getDocument().docContent.charAt(current); if (Character.isWhitespace(inTagName) || ('/' == inTagName) || ('>' == inTagName)) { break; } current++; } // find out if there was a previous value for this name String oldValue = null; tagRange oldAttr = getAttributeLoc(name, new charRange(current, loc.startTag.end)); // choose which kind of quote to use char usingQuote = (-1 != value.indexOf('"')) ? '\'' : '\"'; // make sure we can use it. if (('\'' == usingQuote) && (-1 != value.indexOf('\''))) { throw new IllegalArgumentException("Value contains both \" and \'"); } // build the new attribute string StringBuilder newStuff = new StringBuilder(" "); newStuff.append(name); newStuff.append("="); newStuff.append(usingQuote); newStuff.append(value); newStuff.append(usingQuote); // add it in. if (!oldAttr.isValid()) { // we aren't replacing an existing value getDocument().docContent.insert(current, newStuff.toString()); // move all doc locations which follow this one based on how much we // inserted. getDocument().adjustLocations(current, newStuff.length()); } else { // we are replacing an existing value oldValue = getDocument().docContent.substring(oldAttr.body.start, oldAttr.body.end + 1); getDocument().docContent.delete(oldAttr.body.start, oldAttr.body.end + 1); getDocument().docContent.insert(oldAttr.body.start, value); int delta = value.length() - (oldAttr.body.end - oldAttr.body.start + 1); // move all doc locations which follow this one based on how much we // inserted or deleted. getDocument().adjustLocations(loc.startTag.start + 1, delta); } if (paranoidConsistencyChecking) { checkConsistency(); } return oldValue; } /** * {@inheritDoc} */ public String addAttribute(Attribute newAttrib) { return addAttribute(newAttrib.getName(), newAttrib.getValue()); } /** * {@inheritDoc} */ public Attribute getAttribute(String name) { if (null != uninserted) { throw new IllegalStateException("This element has not been added."); } if (paranoidConsistencyChecking) { checkConsistency(); } // skip past the name portion int current = loc.startTag.start + 1; while (current <= loc.startTag.end) { char inTagName = getDocument().docContent.charAt(current); if (Character.isWhitespace(inTagName) || ('/' == inTagName) || ('>' == inTagName)) { break; } current++; } // find the attribute matching this name tagRange attr = getAttributeLoc(name, new charRange(current, loc.startTag.end)); if (!attr.isValid()) { return null; } // build the object return new Attribute(this, getDocument().docContent.substring(attr.startTag.start, attr.startTag.end + 1) , getDocument().docContent.substring(attr.body.start, attr.body.end + 1)); } protected boolean checkConsistency() { assert loc.isValid(); charRange elementRange = new charRange(loc.startTag.start, loc.endTag.end); assert elementRange.contains(loc.startTag); assert elementRange.contains(loc.body); assert elementRange.contains(loc.endTag); if (null != children) { Iterator<LiteXMLElement> eachChild = children.iterator(); Iterator<LiteXMLElement> nextChilds = children.iterator(); if (nextChilds.hasNext()) { nextChilds.next(); } while (eachChild.hasNext()) { LiteXMLElement aChild = eachChild.next(); assert loc.contains(aChild.loc); if (nextChilds.hasNext()) { LiteXMLElement nextChild = nextChilds.next(); assert aChild.loc.compareTo(nextChild.loc) < 0; } else { assert !eachChild.hasNext(); } aChild.checkConsistency(); } } return true; } /** * The document we are a part of. * * @return The document we are a part of. */ LiteXMLDocument getDocument() { return doc; } }