/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Trie.java * Copyright (C) 2007-2012 University of Waikato, Hamilton, New Zealand package weka.core; import java.io.Serializable; import java.lang.reflect.Array; import java.util.Collection; import java.util.Enumeration; import java.util.Hashtable; import java.util.Iterator; import java.util.Vector; *//** * A class representing a Trie data structure for strings. See also <a href="http://en.wikipedia.org/wiki/Trie" target="_blank">Trie</a> on WikiPedia. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ */ /* public class Trie implements Serializable, Cloneable, Collection<String>, RevisionHandler { *//** for serialization */ /* private static final long serialVersionUID = -5897980928817779048L; *//** * Represents a node in the trie. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ */ /* public static class TrieNode extends DefaultMutableTreeNode implements RevisionHandler { *//** for serialization */ /* private static final long serialVersionUID = -2252907099391881148L; *//** the stop character */ /* public final static Character STOP = '\0'; *//** for fast access to the children */ /* protected Hashtable<Character, TrieNode> m_Children; *//** * initializes the node * * @param c * the value of this node */ /* public TrieNode(char c) { this(new Character(c)); } *//** * initializes the node * * @param c * the value of this node */ /* public TrieNode(Character c) { super(c); m_Children = new Hashtable<Character, TrieNode>(100); } *//** * returns the stored character * * @return the stored character */ /* public Character getChar() { return (Character) getUserObject(); } *//** * sets the character this node represents * * @param value * the character to store */ /* public void setChar(Character value) { setUserObject(value); } *//** * adds the given string to its children (creates children if necessary) * * @param suffix * the suffix to add to its children * @return true if the add operation changed the structure */ /* public boolean add(String suffix) { boolean result; Character c; String newSuffix; TrieNode child; result = false; c = suffix.charAt(0); newSuffix = suffix.substring(1); // find child and add if necessary child = m_Children.get(c); if (child == null) { result = true; child = add(c); } // propagate remaining suffix if (newSuffix.length() > 0) result = child.add(newSuffix) || result; return result; } *//** * adds the given charater to its children * * @param c * the character to add * @return the generated child node */ /* protected TrieNode add(Character c) { TrieNode child; child = new TrieNode(c); add(child); m_Children.put(c, child); return child; } *//** * removes the given characted from its children * * @param c * the character to remove */ /* protected void remove(Character c) { TrieNode child; child = m_Children.get(c); remove(child); m_Children.remove(c); } *//** * Removes a suffix from the trie. * * @param suffix * the suffix to remove * @return true if this trie changed as a result of the call */ /* public boolean remove(String suffix) { boolean result; Character c; String newSuffix; TrieNode child; c = suffix.charAt(0); newSuffix = suffix.substring(1); child = m_Children.get(c); if (child == null) { result = false; } else if (newSuffix.length() == 0) { remove(c); result = true; } else { result = child.remove(newSuffix); if (child.getChildCount() == 0) remove(child.getChar()); } return result; } *//** * checks whether a suffix can be found in its children * * @param suffix * the suffix to look for * @return true if suffix was found */ /* public boolean contains(String suffix) { boolean result; Character c; String newSuffix; TrieNode child; c = suffix.charAt(0); newSuffix = suffix.substring(1); child = m_Children.get(c); if (child == null) result = false; else if (newSuffix.length() == 0) result = true; else result = child.contains(newSuffix); return result; } *//** * creates a deep copy of itself * * @return a deep copy of itself */ /* public Object clone() { TrieNode result; Enumeration<Character> keys; Character key; TrieNode child; result = new TrieNode(getChar()); keys = m_Children.keys(); while (keys.hasMoreElements()) { key = keys.nextElement(); child = (TrieNode) m_Children.get(key).clone(); result.add(child); result.m_Children.put(key, child); } return result; } *//** * Indicates whether some other object is "equal to" this one. * * @param obj * the object to check for equality * @return true if equal */ /* public boolean equals(Object obj) { boolean result; TrieNode node; Enumeration<Character> keys; Character key; node = (TrieNode) obj; // is payload the same? if (getChar() == null) result = (node.getChar() == null); else result = getChar().equals(node.getChar()); // check children if (result) { keys = m_Children.keys(); while (keys.hasMoreElements()) { key = keys.nextElement(); result = m_Children.get(key).equals(node.m_Children.get(key)); if (!result) break; } } return result; } *//** * returns the node with the given suffix * * @param suffix * the suffix to look for * @return null if unsuccessful otherwise the corresponding node */ /* public TrieNode find(String suffix) { TrieNode result; Character c; String newSuffix; TrieNode child; c = suffix.charAt(0); newSuffix = suffix.substring(1); child = m_Children.get(c); if (child == null) result = null; else if (newSuffix.length() == 0) result = child; else result = child.find(newSuffix); return result; } *//** * returns the common prefix for all the nodes starting with this node. The result includes this node, unless it's the root node or a STOP node. * * @return the result of the search */ /* public String getCommonPrefix() { return getCommonPrefix(""); } *//** * returns the common prefix for all the nodes starting with the node for the specified prefix. Can be null if initial prefix is not found. The result includes this node, unless it's the root node or a STOP node. Using the empty string means starting with this node. * * @param startPrefix * the prefix of the node to start the search from * @return the result of the search, null if startPrefix cannot be found */ /* public String getCommonPrefix(String startPrefix) { String result; TrieNode startNode; if (startPrefix.length() == 0) startNode = this; else startNode = find(startPrefix); if (startNode == null) result = null; else result = startPrefix + startNode.determineCommonPrefix(""); return result; } *//** * determines the common prefix of the nodes. * * @param currentPrefix * the common prefix found so far * @return the result of the search */ /* protected String determineCommonPrefix(String currentPrefix) { String result; String newPrefix; if (!isRoot() && (getChar() != STOP)) newPrefix = currentPrefix + getChar(); else newPrefix = currentPrefix; if (m_Children.size() == 1) result = ((TrieNode) getChildAt(0)).determineCommonPrefix(newPrefix); else result = newPrefix; return result; } *//** * returns the number of stored strings, i.e., leaves * * @return the number of stored strings */ /* public int size() { int result; TrieNode leaf; result = 0; leaf = (TrieNode) getFirstLeaf(); while (leaf != null) { if (leaf != getRoot()) result++; leaf = (TrieNode) leaf.getNextLeaf(); } return result; } *//** * returns the full string up to the root * * @return the full string to the root */ /* public String getString() { char[] result; TrieNode node; result = new char[this.getLevel()]; node = this; while (node.getParent() != null) { if (node.isRoot()) break; else result[node.getLevel() - 1] = node.getChar(); node = (TrieNode) node.getParent(); } return new String(result); } *//** * returns the node in a string representation * * @return the node as string */ /* public String toString() { return "" + getChar(); } *//** * Returns the revision string. * * @return the revision */ /* public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } } *//** * Represents an iterator over a trie * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 8034 $ */ /* public static class TrieIterator implements Iterator<String>, RevisionHandler { *//** the node to use as root */ /* protected TrieNode m_Root; *//** the last leaf for this root node */ /* protected TrieNode m_LastLeaf; *//** the current leaf node */ /* protected TrieNode m_CurrentLeaf; *//** * initializes the iterator * * @param node * the node to use as root */ /* public TrieIterator(TrieNode node) { super(); m_Root = node; m_CurrentLeaf = (TrieNode) m_Root.getFirstLeaf(); m_LastLeaf = (TrieNode) m_Root.getLastLeaf(); } *//** * Returns true if the iteration has more elements. * * @return true if there is at least one more element */ /* public boolean hasNext() { return (m_CurrentLeaf != null); } *//** * Returns the next element in the iteration. * * @return the next element */ /* public String next() { String result; result = m_CurrentLeaf.getString(); result = result.substring(0, result.length() - 1); // remove STOP if (m_CurrentLeaf != m_LastLeaf) m_CurrentLeaf = (TrieNode) m_CurrentLeaf.getNextLeaf(); else m_CurrentLeaf = null; return result; } *//** * ignored */ /* public void remove() { } *//** * Returns the revision string. * * @return the revision */ /* public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } } *//** the root node */ /* protected TrieNode m_Root; *//** the hash code */ /* protected int m_HashCode; *//** * whether the structure got modified and the hash code needs to be re-calculated */ /* protected boolean m_RecalcHashCode; *//** * initializes the data structure */ /* public Trie() { super(); m_Root = new TrieNode(null); m_RecalcHashCode = true; } *//** * Ensures that this collection contains the specified element. * * @param o * the string to add * @return true if the structure changed */ /* public boolean add(String o) { return m_Root.add(o + TrieNode.STOP); } *//** * Adds all of the elements in the specified collection to this collection * * @param c * the collection to add */ /* public boolean addAll(Collection<? extends String> c) { boolean result; Iterator<? extends String> iter; result = false; iter = c.iterator(); while (iter.hasNext()) result = add(iter.next()) || result; return result; } *//** * Removes all of the elements from this collection */ /* public void clear() { m_Root.removeAllChildren(); m_RecalcHashCode = true; } *//** * returns a deep copy of itself * * @return a copy of itself */ /* public Object clone() { Trie result; result = new Trie(); result.m_Root = (TrieNode) m_Root.clone(); return result; } *//** * Returns true if this collection contains the specified element. * * @param o * the object to check for in trie * @return true if found */ /* public boolean contains(Object o) { return m_Root.contains(((String) o) + TrieNode.STOP); } *//** * Returns true if this collection contains all of the elements in the specified collection. * * @param c * the collection to look for in the trie * @return true if all elements were found */ /* public boolean containsAll(Collection<?> c) { boolean result; Iterator iter; result = true; iter = c.iterator(); while (iter.hasNext()) { if (!contains(iter.next())) { result = false; break; } } return result; } *//** * checks whether the given prefix is stored in the trie * * @param prefix * the prefix to check * @return true if the prefix is part of the trie */ /* public boolean containsPrefix(String prefix) { return m_Root.contains(prefix); } *//** * Compares the specified object with this collection for equality. * * @param o * the object to check for equality */ /* public boolean equals(Object o) { return m_Root.equals(((Trie) o).getRoot()); } *//** * returns the common prefix for all the nodes * * @return the result of the search */ /* public String getCommonPrefix() { return m_Root.getCommonPrefix(); } *//** * returns the root node of the trie * * @return the root node */ /* public TrieNode getRoot() { return m_Root; } *//** * returns all stored strings that match the given prefix * * @param prefix * the prefix that all strings must have * @return all strings that match the prefix */ /* public Vector<String> getWithPrefix(String prefix) { Vector<String> result; TrieNode node; TrieIterator iter; result = new Vector<String>(); if (containsPrefix(prefix)) { node = m_Root.find(prefix); iter = new TrieIterator(node); while (iter.hasNext()) result.add(iter.next()); } return result; } *//** * Returns the hash code value for this collection. * * @return the hash code */ /* public int hashCode() { if (m_RecalcHashCode) { m_HashCode = toString().hashCode(); m_RecalcHashCode = false; } return m_HashCode; } *//** * Returns true if this collection contains no elements. * * @return true if empty */ /* public boolean isEmpty() { return (m_Root.getChildCount() == 0); } *//** * Returns an iterator over the elements in this collection. * * @return returns an iterator over all the stored strings */ /* public Iterator<String> iterator() { return new TrieIterator(m_Root); } *//** * Removes a single instance of the specified element from this collection, if it is present. * * @param o * the object to remove * @return true if this collection changed as a result of the call */ /* public boolean remove(Object o) { boolean result; result = m_Root.remove(((String) o) + TrieNode.STOP); m_RecalcHashCode = result; return result; } *//** * Removes all this collection's elements that are also contained in the specified collection * * @param c * the collection to remove * @return true if the collection changed */ /* public boolean removeAll(Collection<?> c) { boolean result; Iterator iter; result = false; iter = c.iterator(); while (iter.hasNext()) { result = remove(iter.next()) || result; } m_RecalcHashCode = result; return result; } *//** * Retains only the elements in this collection that are contained in the specified collection * * @param c * the collection to use as reference * @return true if this collection changed as a result of the call */ /* public boolean retainAll(Collection<?> c) { boolean result; Iterator iter; Object o; result = false; iter = iterator(); while (iter.hasNext()) { o = iter.next(); if (!c.contains(o)) result = remove(o) || result; } m_RecalcHashCode = result; return result; } *//** * Returns the number of elements in this collection. * * @return the number of nodes in the tree */ /* public int size() { return m_Root.size(); } *//** * Returns an array containing all of the elements in this collection. * * @return the stored strings as array */ /* public Object[] toArray() { return toArray(new String[0]); } *//** * Returns an array containing all of the elements in this collection; the runtime type of the returned array is that of the specified array. * * @param a * the array into which the elements of this collection are to be stored * @return an array containing the elements of this collection */ /* public <T> T[] toArray(T[] a) { T[] result; Iterator<T> iter; Vector<T> list; int i; list = new Vector<T>(); iter = Utils.<Iterator<T>> cast(iterator()); while (iter.hasNext()) list.add(iter.next()); if (Array.getLength(a) != list.size()) result = Utils.<T[]> cast(Array.newInstance(a.getClass().getComponentType(), list.size())); else result = a; for (i = 0; i < list.size(); i++) result[i] = list.get(i); return result; } *//** * returns the node as String * * @param node * the node to turn into a string * @return the node as string */ /* protected String toString(TrieNode node) { StringBuffer result; int i; StringBuffer indentation; result = new StringBuffer(); // indent the node indentation = new StringBuffer(); for (i = 0; i < node.getLevel(); i++) indentation.append(" | "); result.append(indentation.toString()); // add the node label if (node.getChar() == null) result.append("<root>"); else if (node.getChar() == TrieNode.STOP) result.append("STOP"); else result.append("'" + node.getChar() + "'"); result.append("\n"); // add the children for (i = 0; i < node.getChildCount(); i++) result.append(toString((TrieNode) node.getChildAt(i))); return result.toString(); } *//** * returns the trie in string representation * * @return the trie as string */ /* public String toString() { return toString(m_Root); } *//** * Returns the revision string. * * @return the revision */ /* public String getRevision() { return RevisionUtils.extract("$Revision: 8034 $"); } *//** * Only for testing (prints the built Trie). Arguments are added to the Trie. If not arguments provided then a few default strings are uses for building. * * @param args * commandline arguments */ /* public static void main(String[] args) { String[] data; if (args.length == 0) { data = new String[3]; data[0] = "this is a test"; data[1] = "this is another test"; data[2] = "and something else"; } else { data = args.clone(); } // build trie Trie t = new Trie(); for (int i = 0; i < data.length; i++) t.add(data[i]); System.out.println(t); } } */