package folioxml.xml; import folioxml.core.InvalidMarkupException; import folioxml.core.TokenUtils; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; public class NodeList { private ArrayList<Node> a = null; private Node parent = null; public NodeList() { a = new ArrayList<Node>(); } public NodeList(int startingCapacity) { a = new ArrayList<Node>(startingCapacity); } /** * Wraps the specified node list... * * @param data */ public NodeList(ArrayList<Node> data, boolean useListDirectly) { if (useListDirectly) { a = data; } else { a = new ArrayList<Node>(data.size()); a.addAll(data); } } public NodeList(List<Node> data) { a = new ArrayList<Node>(data.size()); a.addAll(data); } public NodeList(Node aChild) { a = new ArrayList<Node>(); a.add(aChild); } public NodeList(String xml) throws IOException, InvalidMarkupException { a = new ArrayList<Node>(); //There may only be one root node - in fact, this is likely. No need for guesses. addAll(new XmlTokenReader(new StringReader(xml))); } /** * Reads tokens from the reader into this list until the reader is empty. Constructs the XML tree properly. * Remember this will build a DOM - only use for small bits that can fit into memory. * * @param reader * @throws IOException * @throws InvalidMarkupException */ public NodeList(IXmlTokenReader reader) throws IOException, InvalidMarkupException { this(); addAll(reader); } /** * Always call this when NodeList is a child of a Node. * Doesn't modify the .parent propery of children. * * @param parent * @return */ public NodeList setParent(Node parent) { this.parent = parent; return this; } public NodeList addAll(IXmlTokenReader reader) throws IOException, InvalidMarkupException { addUntil(reader, null); return this; } /** * Dangerous - keeps contents in memory. * * @param reader * @param closingTag * @return * @throws IOException * @throws InvalidMarkupException */ public NodeList addUntil(IXmlTokenReader reader, String closingTag) throws IOException, InvalidMarkupException { while (reader.canRead()) { XmlToken r = reader.read(); if (r == null) break; //Closing tags should not be present at this level. Node.ctor should be handling them, if they are paired properly. if (r.isTag() && r.isClosing()) { if (closingTag == null) throw new InvalidMarkupException("Unexpected closing tag encountered :" + r.toString(), r); if (closingTag.equalsIgnoreCase(r.getTagName())) return this; //Exit this level else throw new InvalidMarkupException("Unexpected closing tag encountered :" + r.toString() + ". Expected </" + closingTag + ">.", r); //We hit a unpaired closing tag. } Node n = new Node(r, reader, false); //If this is an opening tag, the constructor will read the contents and matching closing tag before returning. a.add(n); //This doesn't give us great debugging info.. A stack would be a better way .. n.parent = this.parent; } return this; } public Collection<Node> getCollection() { return a; } public List<Node> list() { return a; } /** * Removes all these items from the tree. The NodeList returned will contain a copy of the deleted items. * * @return */ public NodeList remove() { return this.remove(true); } /** * Removes these items and their children from the tree. The NodeList returned may or may not be the same instance, but will contain the deleted items. * * @return */ @SuppressWarnings("unchecked") public NodeList remove(boolean markDeleted) { if (parent != null && parent.children == this) { NodeList ret = new NodeList((ArrayList<Node>) a.clone()); for (Node n : a) { assert (n.parent == parent); n.parent = null; if (markDeleted) n.markDeleted(); } a.clear(); return ret; } else { for (Node n : a) n.remove(markDeleted); } return this; } /** * Removes the specified item from the collection. * * @param n * @return */ public NodeList remove(Node n) { a.remove(n); return this; } /** * Pulls the opening-closing pairs, but leaves the children. Note: If performed on a <td>, <th>, <tr>, <table>, <record>, or <infobase-meta>, invalid markup will result. * * @return */ public NodeList pull() { if (parent != null && parent.children == this) { Node[] items = a.toArray(new Node[]{}); a.clear(); //We clear children. We have a copy for (Node n : items) { assert (n.parent == parent); //Copy grandchildren. if (n.children != null) { Collection<Node> grandchildren = n.children.getCollection(); parent.addChildren(grandchildren); } //Delete backreferences n.parent = null; n.markDeleted(); } } else { for (Node n : a) n.pull(); //One-by-one } return this; } /** * Sets the specified attribute value on all tags in the collection. * * @param attrName * @param value * @return * @throws InvalidMarkupException */ public NodeList set(String attrName, String value) throws InvalidMarkupException { for (Node n : a) if (n.isTag()) n.set(attrName, value); return this; } /** * Removes the specified attribute on all tags in the collection. * * @param attrName * @return * @throws InvalidMarkupException */ public NodeList removeAttr(String attrName) throws InvalidMarkupException { for (Node n : a) if (n.isTag()) n.removeAttr(attrName); return this; } /** * Sets the tag name for all tags in the collection * * @param newName * @return * @throws InvalidMarkupException */ public NodeList setTagName(String newName) throws InvalidMarkupException { for (Node n : a) if (n.isTag()) n.setTagName(newName, true); return this; } /** * Returns a new list of all the children of the items in this collection. * * @return */ public NodeList allChildren() { //calculate the list size int sizeNeeded = 0; for (Node n : a) if (n.children != null) sizeNeeded += n.children.count(); NodeList nl = new NodeList(sizeNeeded); //Build the list for (Node n : a) if (n.children != null) nl.list().addAll(n.children.list()); return nl; } public NodeList flattenRecursive() { //calculate the list size int sizeNeeded = a.size(); for (Node n : a) if (n.children != null) sizeNeeded += n.children.count(); NodeList nl = new NodeList(sizeNeeded); //Build the list for (Node n : a) { nl.list().add(n); if (n.children != null) nl.list().addAll(n.children.flattenRecursive().list()); } return nl; } //@Override public String toString() { StringBuilder sb = new StringBuilder(); writeTo(sb); return sb.toString(); } public void writeTo(StringBuilder sb) { for (Node n : a) n.writeXmlTo(sb); } public String toXmlString(boolean autoIndent) { if (autoIndent) return new XmlFormatter(0).format(this); else { return toString(); } } /** * Replaces each child with a copy of 'n' * * @param n * @return */ public NodeList replaceEach(Node newNode) { for (Node n : a) { Node c = newNode.deepCopy(); c.parent = n.parent; n.parent.children.replace(n, c); n.markDeleted(); n.parent = null; } return this; } /** * Replaces instances of node A in the collection with node B. Dosen't modify parent properties * * @param a * @param b * @return */ public NodeList replace(Node A, Node B) { for (int i = 0; i < count(); i++) { if (a.get(i) == A) a.set(i, B); } return this; } public Node first() { return count() > 0 ? a.get(0) : null; } /** * Returns a NodeList containing the first node in this list. Never returns null. * * @return */ public NodeList firstList() { return count() > 0 ? new NodeList(a.get(0)) : new NodeList(); } public Node first(boolean assertCountEquals1) { assert (!assertCountEquals1 || count() == 1) : "Only one element is expected here. " + count() + " were found."; return first(); } /** * Returns the plaintext contents of the node and its descendants. * TODO: Entity decoding? * * @return */ public String getTextContents() { return writeTextContentsTo(null).toString(); } /** * Writes the text contents of the array to the specified stringbuilder * * @param sb * @return */ public StringBuilder writeTextContentsTo(StringBuilder sb) { if (sb == null) sb = new StringBuilder(estimateTextLength()); for (Node n : a) { if (n.isTextOrEntity()) n.writeTokenTo(sb); else if (n.children != null) n.children.writeTextContentsTo(sb); } return sb; } /** * Not exact. Performs no entity decoding. * * @return */ protected int estimateTextLength() { int size = 0; for (Node n : a) { if (n.isTextOrEntity()) size += n.markup.length(); else if (n.children != null) size += n.children.estimateTextLength(); } return size; } /** * Returns the text contents between the two nodes (they can be descendants). A and B are not included, neither are their children. * You can pass null to A to search for all nodes prior to B, or pass null to B to search all nodes after A. * <p> * TODO: Entity decoding? * * @param a * @param b * @return */ public String getTextContentsBetween(Node a, Node b) { StringBuilder sb = new StringBuilder(); writeTextContentsBetweenCore(sb, a, b); return sb.toString(); } /** * 1 means B was found. We are done. * 0 means A was found, but B wasn't. * -1 means neither was found. * <p> * You must pass a non-null value to B to get a result of 1... otherwise you will get 0. * * @param sb * @param a * @param b * @return */ private int writeTextContentsBetweenCore(StringBuilder sb, Node a, Node b) { boolean afound = (a == null); //If it is null, A has already been found boolean bfound = false; //If b is null, we are searching to the end. for (Node n : this.a) { if (n == b && b != null) { bfound = true; //We hit the termination node return 1; //We are done. Stop. } if (afound && !bfound) { if (n.isTextOrEntity()) n.writeTokenTo(sb); //We are between A and B. Write text } //Process children. if (n.children != null) { int result = n.children.writeTextContentsBetweenCore(sb, afound ? null : a, b); if (result == 1) return 1; //We're done. We found B. if (result == 0) afound = true; //We found A... we can start writing now. //Nothing to do for -1 } //Look for A last, since we don't want to write the contents of A... if (a != null && a == n) afound = true; } if (afound) return 0; //We found A, and may have written content. else return -1; //A hasn't been found yet. } /** * Returns true if all elements in the collection are tags, and the tag name for every one matches 'regex'. Returns true if the collection is empty. * * @param regex * @return * @throws InvalidMarkupException */ public boolean matches(String regex) throws InvalidMarkupException { for (Node n : a) if (!n.isTag() || !n.matches(regex)) return false; return true; } /** * The number of items in the collection * * @return */ public int count() { return a.size(); } /** * Returns a new NodeList containing tags matching the specified tagname. Use recursive to get results deeper than 'children' * * @param regex * @return * @throws InvalidMarkupException */ public NodeList filterByTagName(String regex, boolean recursive) throws InvalidMarkupException { NodeList nl = new NodeList(count()); filterByTagName(regex, recursive, nl); return nl; } private void filterByTagName(String regex, boolean recursive, NodeList addTo) throws InvalidMarkupException { for (Node n : list()) { if (n.matches(regex)) addTo.list().add(n); if (recursive && n.children != null) { n.children.filterByTagName(regex, recursive, addTo); } } } public NodeList sublist(Node from, Node endBefore) { int fromIx = from == null ? 0 : list().indexOf(from); int toIx = endBefore == null ? list().size() : list().indexOf(endBefore); //if (fromIx == -1 || toIx == -1) throw new IndexOutOfBoundsException(); return sublist(fromIx, toIx); } public NodeList sublist(int fromIndex, int toIndex) { return new NodeList(list().subList(fromIndex, toIndex)); } /** * Retuns all nodes (recursively) that match the given set of filters. If no filters are specified, all nodes are returned. * Some results may be children of other results * * @param filters * @return * @throws InvalidMarkupException */ public NodeList search(IFilter... filters) throws InvalidMarkupException { NodeList nl = new NodeList(count()); searchNodes(nl, new And(filters), false, true); return nl; } /** * Retuns all nodes (recursively) that match the given set of filters. If no filters are specified, all nodes are returned. * If a node matches, child nodes are not searched * * @param filters * @return * @throws InvalidMarkupException */ public NodeList searchOuter(IFilter... filters) throws InvalidMarkupException { NodeList nl = new NodeList(count()); searchNodes(nl, new And(filters), true, true); return nl; } public NodeList search(String query) throws InvalidMarkupException { NodeList nl = new NodeList(count()); searchNodes(nl, new FilterQueryParser().parse(query), false, true); return nl; } public XmlToStringWrapper getStringWrapper() throws InvalidMarkupException { return new XmlToStringWrapper(this); } public XmlToStringWrapper getStringWrapper(boolean entityDecode) throws InvalidMarkupException { return new XmlToStringWrapper(this, entityDecode); } private void searchNodes(NodeList addTo, IFilter filter, boolean outermostOnly, boolean recursive) throws InvalidMarkupException { for (Node n : a) { boolean matches = (filter.matches(n)); if (matches) { addTo.list().add(n); if (outermostOnly) continue; //Don't process children in outermostOnly mode } if (n.children != null && recursive) { n.children.searchNodes(addTo, filter, outermostOnly, recursive); } } } public NodeList searchBetween(Node after, Node before, IFilter... filters) throws InvalidMarkupException { NodeList nl = new NodeList(count()); searchNodesBetween(after, before, nl, new And(filters), null, false, true); return nl; } public NodeList textEntityNodesBetween(Node after, Node before, IFilter exclude) throws InvalidMarkupException { NodeList nl = new NodeList(countMatchingNodes(new TextEntityFilter(1))); searchNodesBetween(after, before, nl, new TextEntityFilter(1), exclude, false, true); return nl; } ///Recursive public int countMatchingNodes(IFilter filter) throws InvalidMarkupException { int len = 0; for (Node n : this.a) { //Local length if (filter.matches(n)) { len++; } else if (n.children != null) { len += n.children.countMatchingNodes(filter); } } return len; } /** * 1 means B was found. We are done. * 0 means A was found, but B wasn't. * -1 means neither was found. * <p> * You must pass a non-null value to B to get a result of 1... otherwise you will get 0. * * @param a * @param b * @return */ private int searchNodesBetween(Node a, Node b, NodeList addTo, IFilter filter, IFilter exclude, boolean outermostOnly, boolean recursive) throws InvalidMarkupException { boolean afound = (a == null); //If it is null, A has already been found boolean bfound = false; //If b is null, we are searching to the end. for (Node n : this.a) { if (n == b && b != null) { bfound = true; //We hit the termination node return 1; //We are done. Stop. } boolean matches = false; boolean excluded = exclude != null && exclude.matches(n); boolean searchChildren = recursive && n.children != null && !excluded; if (afound && !bfound && !excluded) { matches = (filter.matches(n)); } if (matches) { addTo.list().add(n); if (outermostOnly) searchChildren = false; //Don't process children in outermostOnly mode } //Process children. if (searchChildren) { int result = n.children.searchNodesBetween(afound ? null : a, b, addTo, filter, exclude, outermostOnly, recursive); if (result == 1) return 1; //We're done. We found B. if (result == 0) afound = true; //We found A... we can start writing now. //Nothing to do for -1 } //Look for A last, since we don't want to write the contents of A... if (a != null && a == n) afound = true; } if (afound) return 0; //We found A, and may have written content. else return -1; //A hasn't been found yet. } /** * Returns only text nodes. * * @return * @throws InvalidMarkupException */ public NodeList textNodes() throws InvalidMarkupException { return search(new NodeFilter() { public boolean matches(Node n) { return n.isTextOrEntity(); } }); } /** * Filters the list to matching items. Not recursive * * @param filters * @return * @throws InvalidMarkupException */ public NodeList filter(IFilter... filters) throws InvalidMarkupException { NodeList nl = new NodeList(count()); searchNodes(nl, new And(filters), false, false); return nl; } public NodeList filterRecursive(IFilter... filters) throws InvalidMarkupException { NodeList nl = new NodeList(countMatchingNodes(new And(filters))); searchNodes(nl, new And(filters), false, true); return nl; } /** * Fails assertion if regex doesn't match the contents of each translator link * * @param regex */ public NodeList assertEachContentMatches(String regex) { for (Node n : a) { if (!TokenUtils.fastMatchesNonCached(regex, new NodeList(n).getTextContents())) { //assert false: "Content of node doesn't match expectations: " + regex + " != " + new NodeList(n).getTextContents(); throw new RuntimeException("Content of node doesn't match expectations: " + regex + " != " + new NodeList(n).getTextContents()); } } return this; } public boolean eachContentMatches(String regex) { for (Node n : a) { if (!TokenUtils.fastMatchesNonCached(regex, new NodeList(n).getTextContents())) { return false; } } return true; } /** * If there are any elements, mustBeTrue must be true or an exception is thrown * * @param mustBeTrue * @return */ public NodeList assertTrue(boolean mustBeTrue) { if (count() > 0) assert (mustBeTrue); return this; } public Node last() { if (count() > 0) return a.get(a.size() - 1); else return null; } /** * Creates a recursive, deep copy of the list of nodes. * * @return */ public NodeList deepCopy() { NodeList nl = new NodeList(count()); nl.setParent(this.parent); for (Node c : a) { nl.list().add(c.deepCopy()); } return nl; } public NodeList assertCombinedContentMatches(String regex) { if (!TokenUtils.fastMatchesNonCached(regex, this.getTextContents())) { throw new RuntimeException("Content of node doesn't match expectations: " + regex + " != " + this.getTextContents()); //assert false: "Content of node doesn't match expectations: " + regex + " != " + this.getTextContents(); } return this; } /** * Sets the parent property of every child to null. Use carefully - you must make sure the parent object also deletes its reference, and/or is deleted itself. * * @return */ public NodeList nullParentRefs() { for (Node n : a) n.parent = null; return this; } /** * Searches recursively for the specified Node instance. Returns false if not found. * * @param firstNumber * @return */ public boolean has(Node firstNumber) { //Search shallow if (a.contains(firstNumber)) return true; for (Node n : a) { if (n.children != null) {// added null check djl 08-26-2010 if (n.children.has(firstNumber)) return true; } } return false; } ///False if the list contains any nodes (recursively) that are not valid phrasing content per HTML5 public boolean phrasingContentOnly() { //https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content for (Node every : flattenRecursive().list()) { if (every.isTag()) { if (!every.matches("abbr|audio|b|bdo|br|button|canvas|cite|code|command|datalist|dfn|em|embded|i|iframe|img|input|kbd|keygen|label|mark|math|meter|noscript|object|output|progress|q|ruby|samp|script|select|small|span|strong|sub|sup|svg|textarea|time|var|video|wbr|") && !every.matches("a|area|del|ins|map|link|meta|bookmark")) { return false; } } } return true; } } /* class NodeListXmlTokenReader implements IXmlTokenReader{ private NodeList nl = null; public NodeListXmlTokenReader(NodeList nl){ this.nl = nl; current = nl.first(); indexes.push(0); childrenFinished.push(false); } private Node current = null; private Stack<Integer> indexes = new Stack<Integer>(); private Stack<Boolean> childrenFinished = new Stack<Boolean>(); public XmlToken read() throws IOException, InvalidMarkupException { //current can point to a node that has been processed but hasn't had the closing tag done yet. SlxToken t = current.toToken(); if (current.children != null && current.count) }8 public boolean canRead() { return current != null; } public void close() {} }*/