package edu.hawaii.jmotif.sax.trie;
import java.util.List;
/**
* Implements the magic trie structure.
*
* @author Pavel Senin.
*
*/
public class SAXTrieTree {
/**
* Constants for the alphabet. Where it starts and ends. English.
*/
private static final Integer aStart = 97;
private static final Integer aEnd = 122;
/** The root of the tree. */
private TrieInnerNode root;
/** The alphabet size used for the building - the maximal word length. */
private Integer alphabetSize;
/**
* Constructor.
*
* @param alphabetSize The alphabet size.
* @throws TrieException If wrong parameters specified.
*/
public SAXTrieTree(Integer alphabetSize) throws TrieException {
if ((null != alphabetSize) && (alphabetSize > 0) && (alphabetSize < (aEnd - aStart))) {
this.alphabetSize = alphabetSize;
root = new TrieInnerNode("root");
buildTrie(root, alphabetSize, alphabetSize);
}
else {
throw new TrieException("Unable to create trie structure for the alphabet size of "
+ alphabetSize);
}
}
/**
* Traverse the tree following the string and get occurrences list.
*
* @param string The query string.
* @return The occurrences array.
* @throws TrieException If error occurs - wrong string length provided etc.
*/
public List<Integer> getOccurrences(String string) throws TrieException {
// sanity check
//
if (this.root.getDescendants().size() > 0 && string.length() > 0
&& this.alphabetSize == string.length() && !containsWrongSymbols(string)) {
// if string length is 2 - just pull an answer
//
if (2 == string.length()) {
return ((TrieLeafNode) ((TrieInnerNode) this.root.getDescendant(string.substring(0, 1)))
.getDescendant(string.substring(1))).getOccurences();
}
// if length greater than 2 - call the method recursively
//
String keyPrefix = string.substring(0, 1);
String suffix = string.substring(1);
return getSubstringOccurrence(this.root.getDescendant(keyPrefix), suffix);
}
else {
throw new TrieException("Unable to get occurences for the string \"" + string
+ "\" the tree depth (alphabet size) is " + this.alphabetSize);
}
}
/**
* Add the occurrence into the table.
*
* @param str The string.
* @param idx The occurrence index.
* @return the full list of occurrences.
* @throws TrieException if goes wrong.
*/
public List<Integer> addOccurence(String str, int idx) throws TrieException {
// sanity check
//
if ((null == str) || (str.length() != this.alphabetSize) || (containsWrongSymbols(str))) {
throw new TrieException("Cannot populate occurrence of \"" + str + "\"into the trie of size "
+ this.alphabetSize);
}
// first wee need to see if the string is going to be accepted
// so we do traverse till the last symbol of the string
//
String prefix = str.substring(0, 1);
String rest = str.substring(1);
// pick the first internal node corresponding to the first character
// WE HAVE TO DO THIS SINCE ROOT NODE IS DIFFERENT A BIT
//
TrieAbstractNode cNode = this.root;
// traverse the tree in the loop
//
while ((rest.length() > 0) && (cNode = ((TrieInnerNode) cNode).getDescendant(prefix)) != null) {
// if string is larger than 1 symbol - go deeper
prefix = rest.substring(0, 1);
rest = rest.substring(1);
}
// check that traversal finished properly
//
if (null == cNode) {
throw new TrieException("Internal error: having a null node where it shouldn't be.");
}
// here last character left - get the array of occurrences and add the new one
//
TrieLeafNode leaf = (TrieLeafNode) ((TrieInnerNode) cNode).getDescendant(prefix);
leaf.addOccurrence(idx);
return leaf.getOccurences();
}
/**
* Build the actual trie.
*
* @param root The root of the current tree.
* @param alphabetSize The alphabet size.
* @param depth2Go The depth of the tree left.
*/
private void buildTrie(TrieAbstractNode root, Integer alphabetSize, Integer depth2Go) {
// if depth allows
//
if (depth2Go > 1) {
//
// internal trie nodes
// create nodes for the alphabet size and recursively call further
for (int i = 0; i < alphabetSize; i++) {
// this char is the one from aStart + a value between 0 and tha alphabet size
char curChar = (char) (i + aStart);
// instantiate and add the node to the descendants list
TrieInnerNode node = new TrieInnerNode(String.valueOf(curChar));
((TrieInnerNode) root).addNext(node);
// recursively call the build
buildTrie(node, alphabetSize, depth2Go - 1);
}
}
else {
//
// depth2Go == 1
//
// nodes after this one must be leafs
for (int i = 0; i < alphabetSize; i++) {
char curChar = (char) (i + aStart);
// so we put them at place
TrieLeafNode node = new TrieLeafNode(String.valueOf(curChar));
((TrieInnerNode) root).addNext(node);
}
}
}
/**
* Internal method used for the tree traversal.
*
* @param root The current node, IT IS NOT ROOT NODE, we just call it root here due to context.
* @param str The string to use for traversal from this node.
* @return list of found occurrences.
* @throws TrieException If error occurs.
*/
private List<Integer> getSubstringOccurrence(TrieAbstractNode root, String str)
throws TrieException {
if (TrieNodeType.INNER.equals(root.getType()) && str.length() > 1) {
// normal case - digging deeper
String keyPrefix = str.substring(0, 1);
String suffix = str.substring(1);
return getSubstringOccurrence(((TrieInnerNode) root).getDescendant(keyPrefix), suffix);
}
else if (TrieNodeType.INNER.equals(root.getType()) && str.length() == 1) {
// string length is 1
return ((TrieLeafNode) ((TrieInnerNode) root).getDescendant(str)).getOccurences();
}
else {
throw new TrieException("Ubnormal condition passed into the method. Unable to proceed.");
}
}
/**
* The string validator - check if string has only proper letters.
*
* @param str The string to check.
* @return True if string contains only acceptable (valid) symbols.
*/
private boolean containsWrongSymbols(String str) {
int maxSymbol = aStart + alphabetSize;
for (int i = 0; i < str.length(); i++) {
if (((int) str.charAt(i)) >= maxSymbol) {
return true;
}
}
return false;
}
}