/* * Copyright 2012 Takao Nakaguchi * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.trie4j.tail.builder; import java.io.Serializable; import org.trie4j.util.CharsCharSequence; public class SuffixTrieTailBuilder implements Serializable, TailBuilder{ public SuffixTrieTailBuilder(){ tails = new StringBuilder(); } public SuffixTrieTailBuilder(StringBuilder tails){ this.tails = tails; } public Node getRoot(){ return root; } @Override public CharSequence getTails() { return tails; } @Override public int insert(CharSequence letters) { return insert(letters, 0, letters.length()); } @Override public int insert(CharSequence letters, int offset, int len){ if(root == null){ tails.append(letters, offset, offset + len).append('\0'); root = new Node(0, len - 1); return 0; } Node responsibleNode = root.insertChild(tails, 0, letters, offset, offset + len - 1); if(root.getParent() != null){ root = root.getParent(); } return responsibleNode.getFirst(); } @Override public int insert(char[] letters) { return insert(letters, 0, letters.length); } @Override public int insert(char[] letters, int offset, int len){ CharSequence lettersSeq = new CharsCharSequence(letters, offset, offset + len); if(root == null){ tails.append(lettersSeq).append('\0'); root = new Node(0, lettersSeq.length() - 1); return 0; } Node responsibleNode = root.insertChild(tails, 0, lettersSeq, 0, lettersSeq.length() - 1); if(root.getParent() != null){ root = root.getParent(); } return responsibleNode.getFirst(); } private Node root; private StringBuilder tails = new StringBuilder(); public static class Node implements Serializable{ public final char[] emptyChars = {}; public Node(int first, int last) { this.first = first; this.last = last; } public Node(int first, int last, Node parent) { this.first = first; this.last = last; this.parent = parent; } public Node(int first, int last, Node parent, Node[] children) { this.first = first; this.last = last; this.parent = parent; this.children = children; } public Node getParent() { return parent; } public void setParent(Node parent) { this.parent = parent; } public int getFirst() { return first; } public int getLast() { return last; } public CharSequence getLetters(CharSequence tails) { return tails.subSequence(first, last + 1); } public void setLetters(int first, int last) { this.first = first; this.last = last; } /** * this.offset this.length * @param childIndex * @param letters * @param offset * @return */ public Node insertChild(StringBuilder tails, int childIndex, CharSequence letters, int begin, int offset){ int matchedCount = 0; int lettersRest = offset + 1 - begin; int thisLettersLength = this.last - this.first + 1; int n = Math.min(lettersRest, thisLettersLength); int c = 0; while(matchedCount < n && (c = letters.charAt(offset - matchedCount) - tails.charAt(this.last - matchedCount)) == 0) matchedCount++; if(matchedCount == n){ if(matchedCount != 0 && lettersRest == thisLettersLength){ return this; } if(lettersRest < thisLettersLength){ Node parent = new Node( this.last - matchedCount + 1, this.last , this.parent , new Node[]{this}); if(this.parent != null){ this.parent.getChildren()[childIndex] = parent; } this.last -= matchedCount; this.parent = parent; return parent; } if(children != null){ int index = 0; int end = getChildren().length; if(end > 16){ int start = 0; while(start < end){ index = (start + end) / 2; Node child = children[index]; c = letters.charAt(offset - matchedCount) - tails.charAt(child.last); if(c == 0){ return child.insertChild(tails, index, letters, begin, offset - matchedCount); } if(c < 0){ end = index; } else if(start == index){ index = end; break; } else{ start = index; } } } else{ for(index = 0; index < end; index++){ Node child = getChildren()[index]; int idx = offset - matchedCount; if(idx < 0){ throw new RuntimeException("???"); } c = letters.charAt(offset - matchedCount) - tails.charAt(child.last); if(c < 0) break; if(c == 0){ return child.insertChild(tails, index, letters, begin, offset - matchedCount); } } } return addChild(tails, index, letters, begin, offset, matchedCount); } else{ return addChild(tails, 0, letters, begin, offset, matchedCount); } } Node[] newParentsChildren = new Node[2]; Node newParent = new Node( this.last - matchedCount + 1, this.last, this.parent, newParentsChildren ); int newChildFirst = tails.length(); tails.append(letters, begin, begin + lettersRest - matchedCount); int newChildLast = tails.length() - 1; if(matchedCount == 0){ tails.append('\0'); //* } else if(matchedCount < 3){ // make the copy of matched characters because those are too short to share. tails.append(letters, begin + lettersRest - matchedCount, begin + lettersRest); int cont = this.last + 1; if(tails.charAt(cont) == '\0'){ tails.append('\0'); } else if(tails.charAt(cont) == '\1'){ tails.append('\1') .append(tails.charAt(cont + 1)) .append(tails.charAt(cont + 2)); } else{ tails.append('\1') .append((char)(cont & 0xffff)) .append((char)((cont & 0xffff0000) >> 16)); } //*/ } else{ int cont = this.last - matchedCount + 1; tails.append('\1') .append((char)(cont & 0xffff)) .append((char)((cont & 0xffff0000) >> 16)); } Node newChild = new Node( newChildFirst, newChildLast, newParent, null ); if(tails.charAt(this.last - matchedCount) < letters.charAt(lettersRest - matchedCount - 1)){ newParentsChildren[0] = this; newParentsChildren[1] = newChild; } else{ newParentsChildren[0] = newChild; newParentsChildren[1] = this; } this.last = this.last - matchedCount; if(this.parent != null){ this.parent.getChildren()[childIndex] = newParent; } this.parent = newParent; return newChild; } public Node[] getChildren() { return children; } public void setChildren(Node[] children) { this.children = children; } private Node addChild(StringBuilder tails, int index, CharSequence letters, int min, int offset, int matchedCount){ int newFirst = tails.length(); tails.append(letters, min, offset - matchedCount + 1); int newLast = tails.length() - 1; if(matchedCount == 0){ tails.append('\0'); //* } else if(matchedCount < 3){ // make the copy of matched characters because those are too short to share. tails.append(letters, offset - matchedCount + 1, offset + 1); int cont = this.last + 1; if(tails.charAt(cont) == '\0'){ tails.append('\0'); } else if(tails.charAt(cont) == '\1'){ tails.append('\1') .append(tails.charAt(cont + 1)) .append(tails.charAt(cont + 2)); } else{ tails.append('\1') .append((char)(cont & 0xffff)) .append((char)((cont & 0xffff0000) >> 16)); } //*/ } else{ int cont = this.last - matchedCount + 1; tails.append('\1') .append((char)(cont & 0xffff)) .append((char)((cont & 0xffff0000) >> 16)); } Node child = new Node(newFirst, newLast, this, null); if(children != null){ Node[] newc = new Node[children.length + 1]; System.arraycopy(children, 0, newc, 0, index); newc[index] = child; System.arraycopy(children, index, newc, index + 1, children.length - index); children = newc; } else{ children = new Node[]{child}; } return child; } private int first; private int last; private Node parent; private Node[] children; private static final long serialVersionUID = 6049322543029754258L; } private static final long serialVersionUID = 2700592335145146376L; }