package edu.berkeley.nlp.lm.values;
import java.util.Arrays;
import java.util.List;
import edu.berkeley.nlp.lm.array.CustomWidthArray;
import edu.berkeley.nlp.lm.array.LongArray;
import edu.berkeley.nlp.lm.bits.BitList;
import edu.berkeley.nlp.lm.bits.BitStream;
import edu.berkeley.nlp.lm.bits.BitUtils;
import edu.berkeley.nlp.lm.collections.Indexer;
import edu.berkeley.nlp.lm.collections.LongToIntHashMap;
import edu.berkeley.nlp.lm.collections.LongToIntHashMap.Entry;
import edu.berkeley.nlp.lm.map.NgramMap;
import edu.berkeley.nlp.lm.util.Logger;
import edu.berkeley.nlp.lm.util.LongRef;
import edu.berkeley.nlp.lm.util.Annotations.OutputParameter;
import edu.berkeley.nlp.lm.util.Annotations.PrintMemoryCount;
public final class UnrankedUncompressedProbBackoffValueContainer implements ProbBackoffValueContainer
{
private static final long serialVersionUID = 964277160049236607L;
private final boolean storeSuffixIndexes;
private final int[] suffixBitsForOrder;
private final long[] numNgramsForEachOrder;
private CustomWidthArray[] valueRanks = null;
private NgramMap<ProbBackoffPair> ngramMap;
public UnrankedUncompressedProbBackoffValueContainer(final boolean storePrefixes, long[] numNgramsForEachOrder) {
this.storeSuffixIndexes = storePrefixes;
this.numNgramsForEachOrder = numNgramsForEachOrder;
this.valueRanks = new CustomWidthArray[numNgramsForEachOrder.length];
suffixBitsForOrder = new int[numNgramsForEachOrder.length];
}
@Override
public UnrankedUncompressedProbBackoffValueContainer createFreshValues(long[] numNgramsForEachOrder_) {
return new UnrankedUncompressedProbBackoffValueContainer(storeSuffixIndexes, numNgramsForEachOrder_);
}
@Override
public final float getProb(final int ngramOrder, final long index) {
return ProbBackoffPair.probOf(getProbBackoff(ngramOrder, index));
}
/**
* @param ngramOrder
* @param index
* @return
*/
private long getProbBackoff(final int ngramOrder, final long index) {
return valueRanks[ngramOrder].get(index, ngramOrder == 0 ? 0 : valueRanks[ngramOrder].getKeyWidth(), numProbBackoffBits(ngramOrder));
}
@Override
public void getFromOffset(final long index, final int ngramOrder, @OutputParameter final ProbBackoffPair outputVal) {
long l = getProbBackoff(ngramOrder, index);
outputVal.prob = ProbBackoffPair.probOf(l);
outputVal.backoff = ProbBackoffPair.backoffOf(l);
}
/*
* (non-Javadoc)
*
* @see edu.berkeley.nlp.lm.values.IProb#getBackoff(int, long)
*/
@Override
public final float getBackoff(final int ngramOrder, final long index) {
return ProbBackoffPair.backoffOf(getProbBackoff(ngramOrder, index));
}
/*
* (non-Javadoc)
*
* @see edu.berkeley.nlp.lm.values.IProb#getScratchValue()
*/
@Override
public ProbBackoffPair getScratchValue() {
return new ProbBackoffPair(Float.NaN, Float.NaN);
}
@Override
public void setFromOtherValues(final ValueContainer<ProbBackoffPair> other) {
final UnrankedUncompressedProbBackoffValueContainer o = (UnrankedUncompressedProbBackoffValueContainer) other;
for (int i = 0; i < valueRanks.length; ++i) {
this.valueRanks[i] = o.valueRanks[i];
}
}
@Override
public void trim() {
}
@Override
public boolean storeSuffixoffsets() {
return storeSuffixIndexes;
}
@Override
public int numValueBits(int ngramOrder) {
return numProbBackoffBits(ngramOrder) + suffixBitsForOrder[ngramOrder];
}
/**
* @param ngramOrder
* @return
*/
private int numProbBackoffBits(int ngramOrder) {
return (ngramOrder == numNgramsForEachOrder.length - 1 ? Float.SIZE : 2 * Float.SIZE);
}
@Override
public boolean add(int[] ngram, int startPos, int endPos, int ngramOrder, long offset, long contextOffset, int word, ProbBackoffPair val_,
long suffixOffset, boolean ngramIsNew) {
if (suffixOffset < 0 && storeSuffixIndexes) return false;
assert suffixOffset < 0 || ngramOrder == 0 || CustomWidthArray.numBitsNeeded(suffixOffset) <= suffixBitsForOrder[ngramOrder] : "Problem with suffix offset bits "
+ suffixOffset + " " + numNgramsForEachOrder[ngramOrder - 1] + " " + Arrays.toString(ngram);
ProbBackoffPair val = val_;
if (val == null) val = getScratchValue();
setSizeAtLeast(10, ngramOrder);
final long indexOfCounts = val.asLong();
final CustomWidthArray valueRanksHere = valueRanks[ngramOrder];
final int widthOffset = ngramOrder == 0 ? 0 : valueRanksHere.getKeyWidth();
valueRanksHere.setAndGrowIfNeeded(offset, ngramOrder == valueRanks.length - 1 ? BitUtils.getLowLong(indexOfCounts) : indexOfCounts, widthOffset,
numProbBackoffBits(ngramOrder));
if (storeSuffixIndexes && ngramOrder > 0) {
assert suffixOffset >= 0;
assert suffixOffset <= Integer.MAX_VALUE;
valueRanksHere.setAndGrowIfNeeded(offset, suffixOffset, widthOffset + numProbBackoffBits(ngramOrder), suffixBitsForOrder[ngramOrder]);
}
return true;
}
@Override
public void setSizeAtLeast(long size, int ngramOrder) {
if (valueRanks[ngramOrder] == null) {
final int suffixBits = (ngramOrder == 0 || !storeSuffixIndexes) ? 0 : suffixBitsForOrder[ngramOrder];
if (storeSuffixIndexes && ngramOrder < suffixBitsForOrder.length - 1) suffixBitsForOrder[ngramOrder + 1] = CustomWidthArray.numBitsNeeded(size);
final CustomWidthArray valueStoringArray = ngramMap.getValueStoringArray(ngramOrder);
final boolean useValueStoringArrayHere = valueStoringArray != null;
if (useValueStoringArrayHere) {
valueRanks[ngramOrder] = valueStoringArray;
} else {
valueRanks[ngramOrder] = new CustomWidthArray(size, numProbBackoffBits(ngramOrder) + suffixBits);
valueRanks[ngramOrder].setAndGrowIfNeeded(size - 1, getScratchValue().asLong());
}
}
}
@Override
public long getSuffixOffset(final long index, final int ngramOrder) {
assert ngramOrder > 0;
final CustomWidthArray valueRanksHere = valueRanks[ngramOrder];
final int widthOffset = valueRanksHere.getKeyWidth();
final int width = widthOffset + numProbBackoffBits(ngramOrder);
return valueRanksHere.get(index, width, valueRanksHere.getFullWidth() - width);
}
@Override
public void trimAfterNgram(int ngramOrder, long size) {
}
@Override
public void setMap(NgramMap<ProbBackoffPair> map) {
this.ngramMap = map;
}
@Override
public void clearStorageForOrder(int ngramOrder) {
}
}