/*
* avenir: Predictive analytic based on Hadoop Map Reduce
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.avenir.markov;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.chombo.util.Utility;
/**
* @author pranab
*
*/
public class SuffixTreeBuilder {
private SuffixTreeNode suffixTree = new SuffixTreeNode() ;
private Map<String, SuffixTreeNode> partitionedSuffixTree = new HashMap<String, SuffixTreeNode>();
private List<String> tokens = new ArrayList<String>();
/**
* @param config
* @param suffixTreeFilePathParam
* @param delim
* @param idOrdinals
* @throws IOException
*/
public SuffixTreeBuilder(Configuration config, String suffixTreeFilePathParam, String delim, int[] idOrdinals) throws IOException {
List<String> lines = Utility.getFileLines(config, suffixTreeFilePathParam);
for (String line : lines) {
String[] items = line.split(delim);
if (null != idOrdinals) {
String compId = Utility.join(items, 0, idOrdinals.length, delim);
SuffixTreeNode tree = partitionedSuffixTree.get(compId);
if (null == tree) {
tree = new SuffixTreeNode();
partitionedSuffixTree.put(compId, tree);
}
tokens.clear();
for (int i = idOrdinals.length; i < items.length; ++i ) {
tokens.add(items[i]);
}
tree.add(tokens);
} else {
tokens.clear();
for (int i = 0; i < items.length; ++i ) {
tokens.add(items[i]);
}
suffixTree.add(tokens);
}
}
}
/**
* @return
*/
public SuffixTreeNode getSuffixTree() {
return suffixTree;
}
/**
* @param partId
* @return
*/
public SuffixTreeNode getSuffixTree(String partId) {
return partitionedSuffixTree.get(partId);
}
}