NodeList.java example

Explorer
folioxml-master
- commandline
  - src
    - folioxml
      - command
        Main.java
      - export
        ExportRunner.java
  - testsrc
    - folioxml
      - export
        TestExportRunner.java
- contrib
  - folioxml-lucene
    - src
      - folioxml
        export
        plugins
        ResolveHyperlinks.java
        lucene
        FieldCollector.java
        IndexFieldOpts.java
        IndexFieldOptsProvider.java
        InfobaseFieldOptsSet.java
        InfobaseSetIndexer.java
        analysis
        AnalyzerPicker.java
        DynamicAnalyzer.java
        ListAnalyzer.java
        ListTokenizer.java
        LowercaseKeywordAnalyzer.java
        folio
        FolioEnuAnalyzer.java
        FolioEnuPhraseAnalyzer.java
        FolioEnuTokenizer.java
        LookAroundCharTokenizer.java
        TokenCombiner.java
        folioQueryParser
        QueryParser.java
        QueryToken.java
        QueryTokenReader.java
    - testsrc
      - apache
        lucene
        CharTokenizer.java
      - folioxml
        directexport
        SimultaneousTest.java
        lucene
        analysis
        folio
        TokenCombinerTest.java
        folioQueryParser
        QueryParserTest.java
        tests
        Indexer.java
- core
  - folioxml
- diff_match_patch
  - oldtest
    - name
      - fraser
        neil
        plaintext
        diff_match_patch_test.java
  - src
    - name
      - fraser
        neil
        plaintext
        diff_match_patch.java
package folioxml.xml;

import folioxml.core.InvalidMarkupException;
import folioxml.core.TokenUtils;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

public class NodeList {

    private ArrayList<Node> a = null;
    private Node parent = null;


    public NodeList() {
        a = new ArrayList<Node>();
    }

    public NodeList(int startingCapacity) {
        a = new ArrayList<Node>(startingCapacity);
    }

    /**
     * Wraps the specified node list...
     *
     * @param data
     */
    public NodeList(ArrayList<Node> data, boolean useListDirectly) {
        if (useListDirectly) {
            a = data;
        } else {
            a = new ArrayList<Node>(data.size());
            a.addAll(data);
        }
    }

    public NodeList(List<Node> data) {
        a = new ArrayList<Node>(data.size());
        a.addAll(data);
    }

    public NodeList(Node aChild) {
        a = new ArrayList<Node>();
        a.add(aChild);
    }

    public NodeList(String xml) throws IOException, InvalidMarkupException {
        a = new ArrayList<Node>(); //There may only be one root node - in fact, this is likely. No need for guesses.
        addAll(new XmlTokenReader(new StringReader(xml)));
    }

    /**
     * Reads tokens from the reader into this list until the reader is empty. Constructs the XML tree properly.
     * Remember this will build a DOM - only use for small bits that can fit into memory.
     *
     * @param reader
     * @throws IOException
     * @throws InvalidMarkupException
     */
    public NodeList(IXmlTokenReader reader) throws IOException, InvalidMarkupException {
        this();
        addAll(reader);
    }


    /**
     * Always call this when NodeList is a child of a Node.
     * Doesn't modify the .parent propery of children.
     *
     * @param parent
     * @return
     */
    public NodeList setParent(Node parent) {
        this.parent = parent;
        return this;
    }


    public NodeList addAll(IXmlTokenReader reader) throws IOException, InvalidMarkupException {
        addUntil(reader, null);
        return this;
    }

    /**
     * Dangerous - keeps contents in memory.
     *
     * @param reader
     * @param closingTag
     * @return
     * @throws IOException
     * @throws InvalidMarkupException
     */
    public NodeList addUntil(IXmlTokenReader reader, String closingTag) throws IOException, InvalidMarkupException {
        while (reader.canRead()) {
            XmlToken r = reader.read();
            if (r == null) break;
            //Closing tags should not be present at this level. Node.ctor should be handling them, if they are paired properly.
            if (r.isTag() && r.isClosing()) {
                if (closingTag == null)
                    throw new InvalidMarkupException("Unexpected closing tag encountered :" + r.toString(), r);

                if (closingTag.equalsIgnoreCase(r.getTagName()))
                    return this; //Exit this level
                else
                    throw new InvalidMarkupException("Unexpected closing tag encountered :" + r.toString() + ". Expected </" + closingTag + ">.", r); //We hit a unpaired closing tag.
            }
            Node n = new Node(r, reader, false); //If this is an opening tag, the constructor will read the contents and matching closing tag before returning.
            a.add(n); //This doesn't give us great debugging info.. A stack would be a better way ..
            n.parent = this.parent;
        }
        return this;
    }


    public Collection<Node> getCollection() {
        return a;
    }

    public List<Node> list() {
        return a;
    }

    /**
     * Removes all these items from the tree. The NodeList returned will contain a copy of the deleted items.
     *
     * @return
     */
    public NodeList remove() {
        return this.remove(true);
    }

    /**
     * Removes these items and their children from the tree. The NodeList returned may or may not be the same instance, but will contain the deleted items.
     *
     * @return
     */
    @SuppressWarnings("unchecked")
    public NodeList remove(boolean markDeleted) {
        if (parent != null && parent.children == this) {
            NodeList ret = new NodeList((ArrayList<Node>) a.clone());
            for (Node n : a) {
                assert (n.parent == parent);
                n.parent = null;
                if (markDeleted) n.markDeleted();
            }
            a.clear();
            return ret;
        } else {
            for (Node n : a) n.remove(markDeleted);
        }
        return this;
    }


    /**
     * Removes the specified item from the collection.
     *
     * @param n
     * @return
     */
    public NodeList remove(Node n) {
        a.remove(n);
        return this;
    }

    /**
     * Pulls the opening-closing pairs, but leaves the children. Note: If performed on a <td>, <th>, <tr>, <table>, <record>, or <infobase-meta>, invalid markup will result.
     *
     * @return
     */
    public NodeList pull() {
        if (parent != null && parent.children == this) {
            Node[] items = a.toArray(new Node[]{});
            a.clear(); //We clear children. We have a copy
            for (Node n : items) {
                assert (n.parent == parent);
                //Copy grandchildren.
                if (n.children != null) {
                    Collection<Node> grandchildren = n.children.getCollection();
                    parent.addChildren(grandchildren);
                }
                //Delete backreferences
                n.parent = null;
                n.markDeleted();

            }
        } else {
            for (Node n : a) n.pull(); //One-by-one
        }
        return this;
    }

    /**
     * Sets the specified attribute value on all tags in the collection.
     *
     * @param attrName
     * @param value
     * @return
     * @throws InvalidMarkupException
     */
    public NodeList set(String attrName, String value) throws InvalidMarkupException {
        for (Node n : a) if (n.isTag()) n.set(attrName, value);
        return this;
    }

    /**
     * Removes the specified attribute on all tags in the collection.
     *
     * @param attrName
     * @return
     * @throws InvalidMarkupException
     */
    public NodeList removeAttr(String attrName) throws InvalidMarkupException {
        for (Node n : a) if (n.isTag()) n.removeAttr(attrName);
        return this;
    }

    /**
     * Sets the tag name for all tags in the collection
     *
     * @param newName
     * @return
     * @throws InvalidMarkupException
     */
    public NodeList setTagName(String newName) throws InvalidMarkupException {
        for (Node n : a) if (n.isTag()) n.setTagName(newName, true);
        return this;
    }


    /**
     * Returns a new list of all the children of the items in this collection.
     *
     * @return
     */
    public NodeList allChildren() {
        //calculate the list size
        int sizeNeeded = 0;
        for (Node n : a) if (n.children != null) sizeNeeded += n.children.count();

        NodeList nl = new NodeList(sizeNeeded);
        //Build the list
        for (Node n : a) if (n.children != null) nl.list().addAll(n.children.list());

        return nl;
    }


    public NodeList flattenRecursive() {
        //calculate the list size
        int sizeNeeded = a.size();
        for (Node n : a) if (n.children != null) sizeNeeded += n.children.count();


        NodeList nl = new NodeList(sizeNeeded);
        //Build the list
        for (Node n : a) {
            nl.list().add(n);
            if (n.children != null) nl.list().addAll(n.children.flattenRecursive().list());
        }

        return nl;
    }


    //@Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
        writeTo(sb);
        return sb.toString();
    }

    public void writeTo(StringBuilder sb) {
        for (Node n : a) n.writeXmlTo(sb);
    }

    public String toXmlString(boolean autoIndent) {
        if (autoIndent)
            return new XmlFormatter(0).format(this);
        else {
            return toString();
        }
    }

    /**
     * Replaces each child with a copy of 'n'
     *
     * @param n
     * @return
     */
    public NodeList replaceEach(Node newNode) {
        for (Node n : a) {
            Node c = newNode.deepCopy();
            c.parent = n.parent;
            n.parent.children.replace(n, c);
            n.markDeleted();
            n.parent = null;
        }
        return this;
    }

    /**
     * Replaces instances of node A in the collection with node B. Dosen't modify parent properties
     *
     * @param a
     * @param b
     * @return
     */
    public NodeList replace(Node A, Node B) {
        for (int i = 0; i < count(); i++) {
            if (a.get(i) == A) a.set(i, B);
        }
        return this;
    }

    public Node first() {
        return count() > 0 ? a.get(0) : null;
    }

    /**
     * Returns a NodeList containing the first node in this list. Never returns null.
     *
     * @return
     */
    public NodeList firstList() {
        return count() > 0 ? new NodeList(a.get(0)) : new NodeList();
    }

    public Node first(boolean assertCountEquals1) {
        assert (!assertCountEquals1 || count() == 1) : "Only one element is expected here. " + count() + " were found.";
        return first();
    }

    /**
     * Returns the plaintext contents of the node and its descendants.
     * TODO: Entity decoding?
     *
     * @return
     */
    public String getTextContents() {
        return writeTextContentsTo(null).toString();
    }

    /**
     * Writes the text contents of the array to the specified stringbuilder
     *
     * @param sb
     * @return
     */
    public StringBuilder writeTextContentsTo(StringBuilder sb) {
        if (sb == null) sb = new StringBuilder(estimateTextLength());

        for (Node n : a) {
            if (n.isTextOrEntity()) n.writeTokenTo(sb);
            else if (n.children != null) n.children.writeTextContentsTo(sb);
        }
        return sb;
    }

    /**
     * Not exact. Performs no entity decoding.
     *
     * @return
     */
    protected int estimateTextLength() {
        int size = 0;
        for (Node n : a) {
            if (n.isTextOrEntity()) size += n.markup.length();
            else if (n.children != null) size += n.children.estimateTextLength();
        }
        return size;
    }

    /**
     * Returns the text contents between the two nodes (they can be descendants). A and B are not included, neither are their children.
     * You can pass null to A to search for all nodes prior to B, or pass null to B to search all nodes after A.
     * <p>
     * TODO: Entity decoding?
     *
     * @param a
     * @param b
     * @return
     */
    public String getTextContentsBetween(Node a, Node b) {
        StringBuilder sb = new StringBuilder();
        writeTextContentsBetweenCore(sb, a, b);
        return sb.toString();
    }

    /**
     * 1 means B was found. We are done.
     * 0 means A was found, but B wasn't.
     * -1 means neither was found.
     * <p>
     * You must pass a non-null value to B to get a result of 1... otherwise you will get 0.
     *
     * @param sb
     * @param a
     * @param b
     * @return
     */
    private int writeTextContentsBetweenCore(StringBuilder sb, Node a, Node b) {
        boolean afound = (a == null); //If it is null, A has already been found
        boolean bfound = false; //If b is null, we are searching to the end.
        for (Node n : this.a) {
            if (n == b && b != null) {
                bfound = true; //We hit the termination node
                return 1; //We are done. Stop.
            }
            if (afound && !bfound) {
                if (n.isTextOrEntity()) n.writeTokenTo(sb); //We are between A and B. Write text
            }

            //Process children.
            if (n.children != null) {
                int result = n.children.writeTextContentsBetweenCore(sb, afound ? null : a, b);
                if (result == 1) return 1; //We're done. We found B.
                if (result == 0) afound = true; //We found A... we can start writing now.
                //Nothing to do for -1
            }
            //Look for A last, since we don't want to write the contents of A...
            if (a != null && a == n) afound = true;
        }
        if (afound)
            return 0; //We found A, and may have written content.
        else
            return -1; //A hasn't been found yet.
    }


    /**
     * Returns true if all elements in the collection are tags, and the tag name for every one matches 'regex'. Returns true if the collection is empty.
     *
     * @param regex
     * @return
     * @throws InvalidMarkupException
     */
    public boolean matches(String regex) throws InvalidMarkupException {
        for (Node n : a) if (!n.isTag() || !n.matches(regex)) return false;
        return true;
    }

    /**
     * The number of items in the collection
     *
     * @return
     */
    public int count() {
        return a.size();
    }

    /**
     * Returns a new NodeList containing tags matching the specified tagname. Use recursive to get results deeper than 'children'
     *
     * @param regex
     * @return
     * @throws InvalidMarkupException
     */
    public NodeList filterByTagName(String regex, boolean recursive) throws InvalidMarkupException {
        NodeList nl = new NodeList(count());
        filterByTagName(regex, recursive, nl);
        return nl;
    }

    private void filterByTagName(String regex, boolean recursive, NodeList addTo) throws InvalidMarkupException {
        for (Node n : list()) {
            if (n.matches(regex)) addTo.list().add(n);
            if (recursive && n.children != null) {
                n.children.filterByTagName(regex, recursive, addTo);
            }
        }
    }

    public NodeList sublist(Node from, Node endBefore) {
        int fromIx = from == null ? 0 : list().indexOf(from);
        int toIx = endBefore == null ? list().size() : list().indexOf(endBefore);
        //if (fromIx == -1 || toIx == -1) throw new IndexOutOfBoundsException();
        return sublist(fromIx, toIx);
    }

    public NodeList sublist(int fromIndex, int toIndex) {
        return new NodeList(list().subList(fromIndex, toIndex));
    }

    /**
     * Retuns all nodes (recursively) that match the given set of filters. If no filters are specified, all nodes are returned.
     * Some results may be children of other results
     *
     * @param filters
     * @return
     * @throws InvalidMarkupException
     */
    public NodeList search(IFilter... filters) throws InvalidMarkupException {
        NodeList nl = new NodeList(count());
        searchNodes(nl, new And(filters), false, true);
        return nl;
    }

    /**
     * Retuns all nodes (recursively) that match the given set of filters. If no filters are specified, all nodes are returned.
     * If a node matches, child nodes are not searched
     *
     * @param filters
     * @return
     * @throws InvalidMarkupException
     */
    public NodeList searchOuter(IFilter... filters) throws InvalidMarkupException {
        NodeList nl = new NodeList(count());
        searchNodes(nl, new And(filters), true, true);
        return nl;
    }

    public NodeList search(String query) throws InvalidMarkupException {
        NodeList nl = new NodeList(count());

        searchNodes(nl, new FilterQueryParser().parse(query), false, true);
        return nl;
    }

    public XmlToStringWrapper getStringWrapper() throws InvalidMarkupException {
        return new XmlToStringWrapper(this);
    }

    public XmlToStringWrapper getStringWrapper(boolean entityDecode) throws InvalidMarkupException {
        return new XmlToStringWrapper(this, entityDecode);
    }

    private void searchNodes(NodeList addTo, IFilter filter, boolean outermostOnly, boolean recursive) throws InvalidMarkupException {
        for (Node n : a) {
            boolean matches = (filter.matches(n));
            if (matches) {
                addTo.list().add(n);
                if (outermostOnly) continue; //Don't process children in outermostOnly mode
            }
            if (n.children != null && recursive) {
                n.children.searchNodes(addTo, filter, outermostOnly, recursive);
            }
        }
    }

    public NodeList searchBetween(Node after, Node before, IFilter... filters) throws InvalidMarkupException {
        NodeList nl = new NodeList(count());
        searchNodesBetween(after, before, nl, new And(filters), null, false, true);
        return nl;
    }


    public NodeList textEntityNodesBetween(Node after, Node before, IFilter exclude) throws InvalidMarkupException {
        NodeList nl = new NodeList(countMatchingNodes(new TextEntityFilter(1)));
        searchNodesBetween(after, before, nl, new TextEntityFilter(1), exclude, false, true);
        return nl;
    }

    ///Recursive
    public int countMatchingNodes(IFilter filter) throws InvalidMarkupException {
        int len = 0;
        for (Node n : this.a) {
            //Local length
            if (filter.matches(n)) {
                len++;
            } else if (n.children != null) {
                len += n.children.countMatchingNodes(filter);
            }

        }
        return len;
    }


    /**
     * 1 means B was found. We are done.
     * 0 means A was found, but B wasn't.
     * -1 means neither was found.
     * <p>
     * You must pass a non-null value to B to get a result of 1... otherwise you will get 0.
     *
     * @param a
     * @param b
     * @return
     */
    private int searchNodesBetween(Node a, Node b, NodeList addTo, IFilter filter, IFilter exclude, boolean outermostOnly, boolean recursive) throws InvalidMarkupException {
        boolean afound = (a == null); //If it is null, A has already been found
        boolean bfound = false; //If b is null, we are searching to the end.
        for (Node n : this.a) {
            if (n == b && b != null) {
                bfound = true; //We hit the termination node
                return 1; //We are done. Stop.
            }
            boolean matches = false;
            boolean excluded = exclude != null && exclude.matches(n);
            boolean searchChildren = recursive && n.children != null && !excluded;

            if (afound && !bfound && !excluded) {
                matches = (filter.matches(n));
            }

            if (matches) {
                addTo.list().add(n);
                if (outermostOnly) searchChildren = false; //Don't process children in outermostOnly mode
            }

            //Process children.
            if (searchChildren) {
                int result = n.children.searchNodesBetween(afound ? null : a, b, addTo, filter, exclude, outermostOnly, recursive);
                if (result == 1) return 1; //We're done. We found B.
                if (result == 0) afound = true; //We found A... we can start writing now.
                //Nothing to do for -1
            }
            //Look for A last, since we don't want to write the contents of A...
            if (a != null && a == n) afound = true;
        }
        if (afound)
            return 0; //We found A, and may have written content.
        else
            return -1; //A hasn't been found yet.
    }


    /**
     * Returns only text nodes.
     *
     * @return
     * @throws InvalidMarkupException
     */
    public NodeList textNodes() throws InvalidMarkupException {
        return search(new NodeFilter() {
            public boolean matches(Node n) {
                return n.isTextOrEntity();
            }
        });

    }


    /**
     * Filters the list to matching items. Not recursive
     *
     * @param filters
     * @return
     * @throws InvalidMarkupException
     */
    public NodeList filter(IFilter... filters) throws InvalidMarkupException {
        NodeList nl = new NodeList(count());
        searchNodes(nl, new And(filters), false, false);
        return nl;
    }

    public NodeList filterRecursive(IFilter... filters) throws InvalidMarkupException {
        NodeList nl = new NodeList(countMatchingNodes(new And(filters)));
        searchNodes(nl, new And(filters), false, true);
        return nl;
    }

    /**
     * Fails assertion if regex doesn't match the contents of each translator link
     *
     * @param regex
     */
    public NodeList assertEachContentMatches(String regex) {
        for (Node n : a) {
            if (!TokenUtils.fastMatchesNonCached(regex, new NodeList(n).getTextContents())) {
                //assert false: "Content of node doesn't match expectations: " + regex + " != " + new NodeList(n).getTextContents();
                throw new RuntimeException("Content of node doesn't match expectations: " + regex + " != " + new NodeList(n).getTextContents());
            }
        }
        return this;
    }

    public boolean eachContentMatches(String regex) {
        for (Node n : a) {
            if (!TokenUtils.fastMatchesNonCached(regex, new NodeList(n).getTextContents())) {
                return false;
            }
        }
        return true;
    }

    /**
     * If there are any elements, mustBeTrue must be true or an exception is thrown
     *
     * @param mustBeTrue
     * @return
     */
    public NodeList assertTrue(boolean mustBeTrue) {
        if (count() > 0) assert (mustBeTrue);
        return this;
    }

    public Node last() {
        if (count() > 0) return a.get(a.size() - 1);
        else return null;
    }

    /**
     * Creates a recursive, deep copy of the list of nodes.
     *
     * @return
     */
    public NodeList deepCopy() {
        NodeList nl = new NodeList(count());
        nl.setParent(this.parent);
        for (Node c : a) {
            nl.list().add(c.deepCopy());
        }
        return nl;
    }

    public NodeList assertCombinedContentMatches(String regex) {
        if (!TokenUtils.fastMatchesNonCached(regex, this.getTextContents())) {
            throw new RuntimeException("Content of node doesn't match expectations: " + regex + " != " + this.getTextContents());
            //assert false: "Content of node doesn't match expectations: " + regex + " != " + this.getTextContents();
        }
        return this;
    }

    /**
     * Sets the parent property of every child to null. Use carefully - you must make sure the parent object also deletes its reference, and/or is deleted itself.
     *
     * @return
     */
    public NodeList nullParentRefs() {
        for (Node n : a) n.parent = null;
        return this;
    }

    /**
     * Searches recursively for the specified Node instance. Returns false if not found.
     *
     * @param firstNumber
     * @return
     */
    public boolean has(Node firstNumber) {
        //Search shallow
        if (a.contains(firstNumber)) return true;

        for (Node n : a) {
            if (n.children != null) {// added null check djl 08-26-2010
                if (n.children.has(firstNumber)) return true;
            }
        }

        return false;
    }

    ///False if the list contains any nodes (recursively) that are not valid phrasing content per HTML5
    public boolean phrasingContentOnly() {
        //https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content
        for (Node every : flattenRecursive().list()) {
            if (every.isTag()) {
                if (!every.matches("abbr|audio|b|bdo|br|button|canvas|cite|code|command|datalist|dfn|em|embded|i|iframe|img|input|kbd|keygen|label|mark|math|meter|noscript|object|output|progress|q|ruby|samp|script|select|small|span|strong|sub|sup|svg|textarea|time|var|video|wbr|")
                        &&
                        !every.matches("a|area|del|ins|map|link|meta|bookmark")) {
                    return false;
                }
            }
        }
        return true;
    }

}
/*
 class NodeListXmlTokenReader implements IXmlTokenReader{
	private NodeList nl = null;
	public NodeListXmlTokenReader(NodeList nl){
		this.nl = nl;
		current = nl.first();
		indexes.push(0);
		childrenFinished.push(false);
	}
	private Node current = null;
	private Stack<Integer> indexes = new Stack<Integer>();
	private Stack<Boolean> childrenFinished = new Stack<Boolean>();
	
	public XmlToken read() throws IOException, InvalidMarkupException {
		//current can point to a node that has been processed but hasn't had the closing tag done yet.
		
		SlxToken t = current.toToken();
		
		
		if (current.children != null && current.count)
		
	}8
	
	
	public boolean canRead() {
		return current != null;
	}
	public void close()  {}
	

	
}*/