package edu.berkeley.nlp.lm.phrasetable;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import edu.berkeley.nlp.lm.array.CustomWidthArray;
import edu.berkeley.nlp.lm.array.LongArray;
import edu.berkeley.nlp.lm.map.HashNgramMap;
import edu.berkeley.nlp.lm.map.NgramMap;
import edu.berkeley.nlp.lm.util.Annotations.OutputParameter;
import edu.berkeley.nlp.lm.util.Annotations.PrintMemoryCount;
import edu.berkeley.nlp.lm.values.ValueContainer;
/**
* Stored type and token counts necessary for estimating a Kneser-Ney language
* model
*
* @author adampauls
*
*/
public final class PhraseTableValueContainer implements ValueContainer<PhraseTableValueContainer.PhraseTableValues>
{
private static final long serialVersionUID = 964277160049236607L;
private static final int EMPTY_VALUE_INDEX = Integer.MAX_VALUE;
public interface PhraseTableValues extends Serializable
{
}
public static class FeaturePhraseTableValues implements PhraseTableValues
{
/**
*
*/
private static final long serialVersionUID = 1L;
float[] features;
public FeaturePhraseTableValues(final float[] features) {
this.features = features;
}
}
public static class TargetTranslationsValues implements PhraseTableValues
{
/**
*
*/
private static final long serialVersionUID = 1L;
long[] targetTranslationOffsets;
int[] targetTranslationOrders;
}
@PrintMemoryCount
private LongArray[] features;
@PrintMemoryCount
private LongArray[] valueIndexes;
@PrintMemoryCount
private ArrayList<CustomWidthArray>[] targetTranslations;
private HashNgramMap<PhraseTableValues> map;
private final int separatorWord;
private final int numFeatures;
@SuppressWarnings("unchecked")
public PhraseTableValueContainer(final int separatorWord, final int numFeatures) {
this.separatorWord = separatorWord;
this.numFeatures = numFeatures;
this.targetTranslations = new ArrayList[5];
this.valueIndexes = new LongArray[5];
this.features = new LongArray[5];
}
@Override
public PhraseTableValueContainer createFreshValues(long[] numNgramsForEachOrder_) {
return new PhraseTableValueContainer(separatorWord, numFeatures);
}
@Override
public void getFromOffset(final long offset, final int ngramOrder, @OutputParameter final PhraseTableValues outputVal) {
if (offset >= valueIndexes[ngramOrder].size()) return;
final long valueIndex = valueIndexes[ngramOrder].get(offset);
if (valueIndex == EMPTY_VALUE_INDEX) return;
if (outputVal instanceof FeaturePhraseTableValues && valueIndex >= 0) {
final float[] fs = new float[numFeatures];
for (int i = 0; i < numFeatures; ++i)
fs[i] = Float.intBitsToFloat((int) features[ngramOrder].get((int) (valueIndex + i)));
((FeaturePhraseTableValues) outputVal).features = fs;
}
if (outputVal instanceof TargetTranslationsValues && valueIndex < 0) {
((TargetTranslationsValues) outputVal).targetTranslationOffsets = readOffsets(targetTranslations[ngramOrder].get((int) (-valueIndex - 1)));
((TargetTranslationsValues) outputVal).targetTranslationOrders = readOrders(targetTranslations[ngramOrder].get((int) (-valueIndex - 1)));
}
}
private int[] readOrders(final CustomWidthArray longArray) {
final int[] ret = new int[(int) longArray.size()];
for (int i = 0; i < longArray.size(); ++i)
ret[i] = (byte) (longArray.get(i) >> Integer.SIZE);
return ret;
}
private long[] readOffsets(final CustomWidthArray longArray) {
final long[] ret = new long[(int) longArray.size()];
for (int i = 0; i < longArray.size(); ++i)
ret[i] = (int) longArray.get(i);
return ret;
}
@Override
public void trimAfterNgram(final int ngramOrder, final long size) {
}
@Override
public PhraseTableValues getScratchValue() {
return new FeaturePhraseTableValues(null);
}
@Override
public boolean add(final int[] ngram, final int startPos, final int endPos, final int ngramOrder, final long offset, final long contextOffset,
final int word, final PhraseTableValues val, final long suffixOffset, final boolean ngramIsNew) {
assert !map.isReversed();
final boolean isSourceSidePhrase = !containsSeparator(ngram, startPos, endPos);
if (isSourceSidePhrase) {
addNewSrcPhrase(ngramOrder, offset);
} else if (val instanceof FeaturePhraseTableValues && ((FeaturePhraseTableValues) val).features != null) {
addFeaturesForWholePhrase(ngramOrder, offset, val);
addPointerToTargetSidePhrase(ngramOrder, offset, contextOffset, word);
} else if (ngramIsNew) {
assert val instanceof TargetTranslationsValues || ((FeaturePhraseTableValues) val).features == null;
growValueIndexArrayIfNecessary(ngramOrder);
valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), EMPTY_VALUE_INDEX);
}
return true;
}
private boolean containsSeparator(final int[] ngram, final int startPos, final int endPos) {
for (int i = startPos; i < endPos; ++i)
if (ngram[i] == separatorWord) return true;
return false;
}
/**
* @param ngramOrder
* @param offset
*/
private void addNewSrcPhrase(final int ngramOrder, final long offset) {
growValueIndexArrayIfNecessary(ngramOrder);
if (ngramOrder >= targetTranslations.length) {
targetTranslations = Arrays.copyOf(targetTranslations, targetTranslations.length * 3 / 2);
}
if (targetTranslations[ngramOrder] == null) {
targetTranslations[ngramOrder] = new ArrayList<CustomWidthArray>();
}
final ArrayList<CustomWidthArray> targetTranslationPointersHere = targetTranslations[ngramOrder];
final long currVal = offset >= valueIndexes[ngramOrder].size() ? 0 : valueIndexes[ngramOrder].get((int) (offset));
if (currVal == 0) valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), (-targetTranslations[ngramOrder].size() - 1));
targetTranslationPointersHere.add(new CustomWidthArray(3, Integer.SIZE + Byte.SIZE));
}
/**
* @param ngramOrder
* @param offset
* @param contextOffset
* @param word
*/
private void addPointerToTargetSidePhrase(final int ngramOrder, final long offset, final long contextOffset, final int word) {
int currWord = word;
long srcPhraseOffset = contextOffset;
int srcPhraseOrder = ngramOrder - 1;
while (currWord != separatorWord) {
currWord = map.getNextWord(srcPhraseOffset, srcPhraseOrder);
srcPhraseOffset = map.getNextContextOffset(srcPhraseOffset, srcPhraseOrder);
srcPhraseOrder--;
}
final long valueIndex = -valueIndexes[srcPhraseOrder].get(srcPhraseOffset) - 1;
final ArrayList<CustomWidthArray> targetTranslationPointersHere = targetTranslations[srcPhraseOrder];
targetTranslationPointersHere.get((int) valueIndex).add(combineOrderAndOffset(ngramOrder, offset));
}
/**
* @param ngramOrder
* @param offset
* @return
*/
private long combineOrderAndOffset(final int ngramOrder, final long offset) {
return (((long) ngramOrder) << Integer.SIZE) | offset;
}
/**
* @param ngramOrder
* @param offset
* @param val
*/
private void addFeaturesForWholePhrase(final int ngramOrder, final long offset, final PhraseTableValues val) {
growValueIndexArrayIfNecessary(ngramOrder);
if (ngramOrder >= features.length) {
features = Arrays.copyOf(features, Math.max(ngramOrder + 1, features.length * 3 / 2));
}
if (features[ngramOrder] == null) features[ngramOrder] = LongArray.StaticMethods.newLongArray(Integer.MAX_VALUE, Integer.MAX_VALUE);
valueIndexes[ngramOrder].setAndGrowIfNeeded((int) (offset), features[ngramOrder].size());
for (int f = 0; f < numFeatures; ++f)
features[ngramOrder].add(Float.floatToIntBits(((FeaturePhraseTableValues) val).features[f]));
}
/**
* @param ngramOrder
*/
private void growValueIndexArrayIfNecessary(final int ngramOrder) {
if (ngramOrder >= valueIndexes.length) {
valueIndexes = Arrays.copyOf(valueIndexes, Math.max(ngramOrder + 1, valueIndexes.length * 3 / 2));
}
if (valueIndexes[ngramOrder] == null) valueIndexes[ngramOrder] = LongArray.StaticMethods.newLongArray(Integer.MAX_VALUE, Integer.MAX_VALUE);
}
@Override
public void setSizeAtLeast(final long size, final int ngramOrder) {
}
@Override
public void setFromOtherValues(final ValueContainer<PhraseTableValues> other) {
final PhraseTableValueContainer other_ = (PhraseTableValueContainer) other;
this.features = other_.features;
this.targetTranslations = other_.targetTranslations;
this.valueIndexes = other_.valueIndexes;
}
@Override
public void trim() {
for (int ngramOrder = 0; ngramOrder < features.length; ++ngramOrder) {
if (features[ngramOrder] != null) features[ngramOrder].trim();
if (valueIndexes[ngramOrder] != null) valueIndexes[ngramOrder].trim();
if (ngramOrder < targetTranslations.length && targetTranslations[ngramOrder] != null) {
targetTranslations[ngramOrder].trimToSize();
for (int j = 0; j < targetTranslations[ngramOrder].size(); ++j) {
targetTranslations[ngramOrder].get(j).trim();
}
}
}
}
@Override
public void setMap(final NgramMap<PhraseTableValues> map) {
this.map = (HashNgramMap<PhraseTableValues>) map;
}
public int getSeparatorWord() {
return separatorWord;
}
@Override
public void clearStorageForOrder(int ngramOrder) {
features[ngramOrder] = null;
valueIndexes[ngramOrder] = null;
targetTranslations[ngramOrder] = null;
}
@Override
public boolean storeSuffixoffsets() {
return false;
}
@Override
public int numValueBits(int ngramOrder) {
return 0;
}
}