package org.apache.lucene.util.fst;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
// Used to dedup states (lookup already-frozen states)
final class NodeHash<T> {
private int[] table;
private int count;
private int mask;
private final FST<T> fst;
private final FST.Arc<T> scratchArc = new FST.Arc<T>();
public NodeHash(FST<T> fst) {
table = new int[16];
mask = 15;
this.fst = fst;
}
private boolean nodesEqual(Builder.UnCompiledNode<T> node, int address) throws IOException {
final FST<T>.BytesReader in = fst.getBytesReader(0);
fst.readFirstRealArc(address, scratchArc);
if (scratchArc.bytesPerArc != 0 && node.numArcs != scratchArc.numArcs) {
return false;
}
for(int arcUpto=0;arcUpto<node.numArcs;arcUpto++) {
final Builder.Arc<T> arc = node.arcs[arcUpto];
if (arc.label != scratchArc.label ||
!arc.output.equals(scratchArc.output) ||
((Builder.CompiledNode) arc.target).address != scratchArc.target ||
!arc.nextFinalOutput.equals(scratchArc.nextFinalOutput) ||
arc.isFinal != scratchArc.isFinal()) {
return false;
}
if (scratchArc.isLast()) {
if (arcUpto == node.numArcs-1) {
return true;
} else {
return false;
}
}
fst.readNextRealArc(scratchArc, in);
}
return false;
}
// hash code for an unfrozen node. This must be identical
// to the un-frozen case (below)!!
private int hash(Builder.UnCompiledNode<T> node) {
final int PRIME = 31;
//System.out.println("hash unfrozen");
int h = 0;
// TODO: maybe if number of arcs is high we can safely subsample?
for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) {
final Builder.Arc<T> arc = node.arcs[arcIdx];
//System.out.println(" label=" + arc.label + " target=" + ((Builder.CompiledNode) arc.target).address + " h=" + h + " output=" + fst.outputs.outputToString(arc.output) + " isFinal?=" + arc.isFinal);
h = PRIME * h + arc.label;
h = PRIME * h + ((Builder.CompiledNode) arc.target).address;
h = PRIME * h + arc.output.hashCode();
h = PRIME * h + arc.nextFinalOutput.hashCode();
if (arc.isFinal) {
h += 17;
}
}
//System.out.println(" ret " + (h&Integer.MAX_VALUE));
return h & Integer.MAX_VALUE;
}
// hash code for a frozen node
private int hash(int node) throws IOException {
final int PRIME = 31;
final FST<T>.BytesReader in = fst.getBytesReader(0);
//System.out.println("hash frozen");
int h = 0;
fst.readFirstRealArc(node, scratchArc);
while(true) {
//System.out.println(" label=" + scratchArc.label + " target=" + scratchArc.target + " h=" + h + " output=" + fst.outputs.outputToString(scratchArc.output) + " next?=" + scratchArc.flag(4) + " final?=" + scratchArc.isFinal());
h = PRIME * h + scratchArc.label;
h = PRIME * h + scratchArc.target;
h = PRIME * h + scratchArc.output.hashCode();
h = PRIME * h + scratchArc.nextFinalOutput.hashCode();
if (scratchArc.isFinal()) {
h += 17;
}
if (scratchArc.isLast()) {
break;
}
fst.readNextRealArc(scratchArc, in);
}
//System.out.println(" ret " + (h&Integer.MAX_VALUE));
return h & Integer.MAX_VALUE;
}
public int add(Builder.UnCompiledNode<T> node) throws IOException {
// System.out.println("hash: add count=" + count + " vs " + table.length);
final int h = hash(node);
int pos = h & mask;
int c = 0;
while(true) {
final int v = table[pos];
if (v == 0) {
// freeze & add
final int address = fst.addNode(node);
//System.out.println(" now freeze addr=" + address);
assert hash(address) == h : "frozenHash=" + hash(address) + " vs h=" + h;
count++;
table[pos] = address;
if (table.length < 2*count) {
rehash();
}
return address;
} else if (nodesEqual(node, v)) {
// same node is already here
return v;
}
// quadratic probe
pos = (pos + (++c)) & mask;
}
}
// called only by rehash
private void addNew(int address) throws IOException {
int pos = hash(address) & mask;
int c = 0;
while(true) {
if (table[pos] == 0) {
table[pos] = address;
break;
}
// quadratic probe
pos = (pos + (++c)) & mask;
}
}
private void rehash() throws IOException {
final int[] oldTable = table;
table = new int[2*table.length];
mask = table.length-1;
for(int idx=0;idx<oldTable.length;idx++) {
final int address = oldTable[idx];
if (address != 0) {
addNew(address);
}
}
}
public int count() {
return count;
}
}