/**
* Licensed to Cloudera, Inc. under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Cloudera, Inc. licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.arabidopsis.ahocorasick;
import java.util.HashSet;
import java.util.Set;
/**
* A state represents an element in the Aho-Corasick tree.
*/
class State<T> {
// Arbitrarily chosen constant. If this state ends up getting
// deeper than THRESHOLD_TO_USE_SPARSE, then we switch over to a
// sparse edge representation. I did a few tests, and there's a
// local minima here. We may want to choose a more sophisticated
// strategy.
private static final int THRESHOLD_TO_USE_SPARSE = 3;
private int depth;
private EdgeList<T> edgeList;
private State<T> fail;
private Set<T> outputs;
public State(int depth) {
this.depth = depth;
if (depth > THRESHOLD_TO_USE_SPARSE)
this.edgeList = new SparseEdgeList<T>();
else
this.edgeList = new DenseEdgeList<T>();
this.fail = null;
this.outputs = new HashSet<T>();
}
public State<T> extend(byte b) {
if (this.edgeList.get(b) != null)
return this.edgeList.get(b);
State<T> nextState = new State<T>(this.depth + 1);
this.edgeList.put(b, nextState);
return nextState;
}
public State<T> extendAll(byte[] bytes) {
State<T> state = this;
for (int i = 0; i < bytes.length; i++) {
if (state.edgeList.get(bytes[i]) != null)
state = state.edgeList.get(bytes[i]);
else
state = state.extend(bytes[i]);
}
return state;
}
/**
* Returns the size of the tree rooted at this State. Note: do not call this
* if there are loops in the edgelist graph, such as those introduced by
* AhoCorasick.prepare().
*/
public int size() {
byte[] keys = edgeList.keys();
int result = 1;
for (int i = 0; i < keys.length; i++)
result += edgeList.get(keys[i]).size();
return result;
}
public State<T> get(byte b) {
return this.edgeList.get(b);
}
public void put(byte b, State<T> s) {
this.edgeList.put(b, s);
}
public byte[] keys() {
return this.edgeList.keys();
}
public State<T> getFail() {
return this.fail;
}
public void setFail(State<T> f) {
this.fail = f;
}
public void addOutput(T o) {
this.outputs.add(o);
}
public Set<T> getOutputs() {
return this.outputs;
}
}