package TaiGameCore;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectInputStream;
import java.io.ObjectOutput;
import java.io.ObjectOutputStream;
import java.io.Serializable;
/**
* My super fancy DAWG implementation that supports all sorts of crazy deserialization / serialization features.
*
* 'Yeah, I'm proud of it. The documentation would overwhelm this code, so please see the report instead.
*
* @author Benjamin
*/
public class TaiDAWG <E>{
public TaiDAWG(){
TaiTrees.SiblingNode<WordByRef<E>> createNullNode = (TaiTrees.SiblingNode<WordByRef<E>>)TaiTrees.SiblingNode.createNullNode();
SuperNode = new WordByRef<E>('0',null,0,createNullNode); //This is arbitrary. It won't ever matter.
}
//Null holder at the top of the tree
private WordByRef<E> SuperNode;
/**
* An arbitrary-linkage-numbered tree-map-node implementation.
*
* Null values are allowed, so be careful.
* @author Benjamin
*/
public static class WordByRef<E> implements TaiTrees.TaiTreeOTreesNode<WordByRef<E>>, Serializable{
public static <F> String toString(TaiDAWG.WordByRef<F> data){
//Crazy "go-up-the-tree" scheme.
int sizeOfWebURL = data.getNodeDepth();
char[] toFill = new char[sizeOfWebURL];
TaiDAWG.WordByRef<F> cur = data;
for(int k = 0; k < sizeOfWebURL; k++){
toFill[toFill.length-1-k]=cur.getData();
cur = cur.getParent();
}
return new String(toFill);
}
private char myChar;
public static int REFCOUNT = 0;
private E myValue;
private WordByRef<E> parent;
private int depth;
private int maxWordLength = -1; //Height of an empty tree is -1
private boolean isWord = false; //Flag to specify that this node of the tree can terminate a word
/**
* All other subtrees branching off from this parent node.
*/
private TaiTrees.AATreeForSiblings<WordByRef<E>> levelSiblings;
public int uniqueID = -1;
public WordByRef(char theChar, WordByRef<E> parent, int depth, TaiTrees.SiblingNode<WordByRef<E>> treeNullNode){
REFCOUNT++;
myChar = theChar;
this.parent = parent;
this.depth = depth;
levelSiblings = new TaiTrees.AATreeForSiblings<WordByRef<E>>(treeNullNode);
}
public void setValue(E value){
isWord = true; //This node ends a word! Yay!
this.myValue = value;
}
/**
* Recursively ensure that the word is represented by our DicTree (that is, the union of subtrees)
*/
public WordByRef<E> insert(String word, int length, E value) {
if (length==0){
setValue(value);
return this; //Base case. Done!
}
maxWordLength = Math.max(length,maxWordLength);
//Now, maxWordLength >= 1. It is a valid height
char nextLevel = word.charAt(word.length()-length);
WordByRef exists = levelSiblings.get(nextLevel);
if (exists==null){
exists = new WordByRef(nextLevel,this,depth+1,levelSiblings.getNullNode());
levelSiblings.insert(nextLevel,exists);
}
return exists.insert(word,length-1,value);
}
public E getContentData(){
return myValue;
}
/**
* Returns null if no mapping is present for this word.
*/
public WordByRef<E> get(String word, int length){
if (length==0){
if (isWord){//Is this a full word?
return this;
} else {
return null; //The word is actually a prefix
}
}
char nextLevel = word.charAt(word.length()-length);
WordByRef<E> exists = levelSiblings.get(nextLevel);
if (exists==null){ //Definately not a word
return null;
}
return exists.get(word,length-1);
}
/**
* Recursively checks if the word is "started" in the tree, that is, Prefixes will return true.
*/
public boolean isPrefix(String word){
if (word.length()==0){
return true; //The empty set is a subset of every set.
}
char nextLevel = word.charAt(0);
WordByRef exists = levelSiblings.get(nextLevel);
if (exists==null){
return false;
}
return exists.isPrefix(word.substring(1));
}
/**
* Recursively checks if the word is "started" and successfully ends at a word-node of the tree.
* Only whole words will return true.
*/
public boolean isContained(String word){
if (word.length()==0){
if (isWord){//Is this a full word?
return true;
} else {
return false; //The word is actually a prefix
}
}
char nextLevel = word.charAt(0);
WordByRef exists = levelSiblings.get(nextLevel);
if (exists==null){ //Definately not a word
return false;
}
return exists.isContained(word.substring(1));
}
public TaiTrees.AATreeForSiblings<WordByRef<E>> getSubTree(){
return levelSiblings;
}
/**
* The height of a node is equal to the longest word ever inserted "past" it.
*/
public int height() {
return maxWordLength;
}
public char getData() {
return myChar;
}
public boolean isFlaggedNode() {
return isWord;
}
public WordByRef<E> getParent() {
return parent;
}
public int getNodeDepth() {
return depth;
}
}
/**
* Returns an iterator that can be used to traverse over all of the direct descendents of this node
*/
public TaiTrees.StringTreeIterator<WordByRef<E>> iterator() {
return new TaiTrees.StringTreeIterator<WordByRef<E>>(SuperNode);
}
private int size = 0; //Running number of unique keys
/**
* Exposed generation methods
*/
public TaiDAWG.WordByRef<E> insert(String key, E value) {
TaiDAWG.WordByRef<E> got = SuperNode.get(key,key.length());
if (got==null){
size++;
return SuperNode.insert(key, key.length(), value);
}
got.setValue(value);
return got;
}
public TaiDAWG.WordByRef<E> get(String key) {
return SuperNode.get(key, key.length());
}
/**
* Callback for deserializtion: Whenever a node is deserialized, notify this listener.
*/
public interface DeserializationOrderedListener<E>{
public void OrderedDeserialization(WordByRef<E> next);
}
/**
* A "full" deserialization.
*/
public void readInTree(ObjectInput in, DeserializationOrderedListener<E> webSiteReferences) throws IOException, ClassNotFoundException {
if (LogDeserialization){
System.err.println("Fully Deserializing (!)");
}
readInTree(in,SuperNode,webSiteReferences,null,true);
}
private static boolean LogDeserialization = false;
/*
* A "partial" deserialization.
*/
public void readInTree(ObjectInput in, DeserializationOrderedListener<E> webSiteReferences, String[] specificQuery) throws IOException, ClassNotFoundException {
if (LogDeserialization){
System.out.println("INFO: Partial Deseriazliation towards \""+specificQuery+"\"");
}
readInTree(in,SuperNode,webSiteReferences,specificQuery,true);
}
private transient char[] readInTreeBuffer = new char[1024];
/**
* Deserialize this TaiDawg from in. This is more easily understood from the report.
*
* root: this is a recursive deserializtion; this method will be called recursively on successive children
*
* webSiteReferences: If non-null, each new node deseriailzed will trigger webSiteReference's callbacks
* (You can do with that what you will... )
*
* onlySearch: Here is where the magic happens. We only deserialize the parts of the tree that are fuzzy-matched
* to this array of strings. That is, we minimize the size that actually has to be exploded to memory
*
* maintainChildren: When we are "passing over" nodes (due to onlySearch telling us NOT to deserialize them),
* we need to make sure that child nodes do NOT add themselves to the tree. This basically just ensures that
* the resulting tree is valid.
*/
private void readInTree(ObjectInput in, WordByRef<E> root, DeserializationOrderedListener<E> webSiteReferences,String[] onlySearch, boolean maintainChildren) throws IOException, ClassNotFoundException {
int numchildren = -1;
if (in.readBoolean()){
numchildren = in.readByte();
} else {
numchildren = in.readInt();
}
int treeDepth = root.depth;
for(int k = 0; k < numchildren; k++){
char newChar = in.readChar();
//Read the node
WordByRef<E> toAdd = new WordByRef<E>(newChar,root,treeDepth+1,root.levelSiblings.getNullNode());
toAdd.maxWordLength = -2; //UNKNOWN.
//Decide how we want to handle this node
boolean maintainChild = false;
boolean ignoreChild = false;
if (maintainChildren){ //Do a check to see if we should maintain the next level of children:
//Ok, we can drop this sibling now if we need to.
if (onlySearch!=null){
//We have an instruction to ONLY deserialize up to a certain word.
readInTreeBuffer[treeDepth]=newChar;
//If this path matches one of the words...
boolean fuzzyHolds = false;
boolean ignoreShortWord = true;
boolean wordTooShort = true;
for(String possible : onlySearch){
boolean fuzzyCond = treeDepth<possible.length();
if (!fuzzyCond){
continue; //Shortcircuiting
}
//A very broad fuzzy search. We'll use levenshtein distances to prune this later.
for(int rc = 0; rc < treeDepth; rc++){
fuzzyCond&=(readInTreeBuffer[rc]==possible.charAt(rc));
}
if (fuzzyCond){
fuzzyHolds = true;
//break; ''We can't break.
//Ok, so this word fuzzy-requires this word.
ignoreShortWord &= (possible.length()-1!=treeDepth);
wordTooShort &= (possible.length()-3>treeDepth);
}
}
if (fuzzyHolds){ //make this FUZZY BROADER
//Maintain this child (everything else gets GC'ed.)
maintainChild = true;
//Ok, so now let's narrow our fuzzy search (it includes too much):
if (treeDepth<=3 && ignoreShortWord){ //Now the whole word.
//When words are less than or equal to 3 letters long, only exact matches are allowed.
ignoreChild = true;
} else {
//An obvious prune: words that are 3 letters too short.
if (wordTooShort){
ignoreChild = true; //3 characters off.
}
//Fuzzy narrowing: Levenshtein distances
//This carries into multilingual (i.e. asian) languages as well.
}
} else {
//Just to make sure the GCing goes ok:
toAdd.parent = null;
}
} else {
//Normal behavior is to always maintain:
maintainChild = true;
}
} //End "maintain children" check. If this block was not executed, continue skipping children.
//Is it a word?
boolean isWord = in.readBoolean();
E value = null;
if (isWord){
value = readValue(in);
}
if (maintainChild){
root.levelSiblings.insert(newChar, toAdd);
SuperNode.maxWordLength = Math.max(SuperNode.maxWordLength, treeDepth+1);
//If it's a word, do more:
if (isWord){
if (webSiteReferences!=null){
webSiteReferences.OrderedDeserialization(toAdd);
}
if (!ignoreChild){ //Recognize its value.
size++;
toAdd.setValue(value);
}
}
}
//Recurse further.
readInTree(in,toAdd,webSiteReferences,onlySearch,maintainChild);
}
}
/**
* Serializes this taiDawg.
*/
public void writeOutTree(ObjectOutput out) throws IOException {
int[] immutableInt = new int[]{0}; //counter
writeOutTree(out,SuperNode,immutableInt);
//System.out.println(immutableInt[0]+" nodes written");
}
/**
* Serializes this taiDawg.
*
* superNode2: this is a recursive method, called to serialize successive children
*
* immutableInt: an immutable integer (a single-element int array) that contains the number
* of nodes written out so far. We use this to assign a unique numbering system to each node
* in the tree. Very important for correct serialization!
*/
private void writeOutTree(ObjectOutput out, WordByRef<E> superNode2, int[] immutableInt) throws IOException {
int sizie = superNode2.levelSiblings.size();
if (sizie < Byte.MAX_VALUE){
out.writeBoolean(true);
out.writeByte(sizie);
} else {
out.writeBoolean(false);
out.writeInt(sizie);
}
TaiTrees.BSTIterator<WordByRef<E>> iterator = superNode2.levelSiblings.iterator();
for(int k = 0; k < sizie; k++){
WordByRef<E> next = iterator.next();
out.writeChar(next.myChar);
out.writeBoolean(next.isWord);
if (next.isWord){
//Mark the uniqueID, increment so that the next node gets corrected:
next.uniqueID = immutableInt[0]++;
if (next.uniqueID > size){
throw new RuntimeException(""+next.uniqueID);
}
writeValue(out,next.myValue);
}
writeOutTree(out,next,immutableInt);
}
}
/**
* Subclasses of taiDawg can include a custom serialization routine for the E objects.
* This is ESPECIALLY useful if E is not serializable!
*/
public void writeValue(ObjectOutput out, E myValue) throws IOException {
((ObjectOutputStream)out).writeUnshared(myValue);
}
public E readValue(ObjectInput in) throws IOException, ClassNotFoundException {
return (E)((ObjectInputStream)in).readUnshared();
}
public int size() {
return size;
}
}