package edu.berkeley.nlp.lm.io;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map.Entry;
import edu.berkeley.nlp.lm.array.LongArray;
import edu.berkeley.nlp.lm.collections.Counter;
import edu.berkeley.nlp.lm.collections.Indexer;
import edu.berkeley.nlp.lm.collections.LongToIntHashMap;
import edu.berkeley.nlp.lm.collections.LongRepresentable;
import edu.berkeley.nlp.lm.util.Logger;
/**
* Reader callback which adds n-grams to an NgramMap
*
* @author adampauls
*
* @param <V>
* Value type
*/
public final class FirstPassCallback<V extends LongRepresentable<V>> implements ArpaLmReaderCallback<V>
{
private LongToIntHashMap valueCounter;
private LongArray[] numNgramsForEachWord;
private long[] numNgramsForOrder;
private final boolean reverse;
private int maxNgramOrder = 0;
public FirstPassCallback(final boolean reverse) {
this.reverse = reverse;
this.valueCounter = new LongToIntHashMap();
}
@Override
public void call(final int[] ngram, final int startPos, final int endPos, final V v, final String words) {
maxNgramOrder = Math.max(endPos - startPos, maxNgramOrder);
final int ngramOrder = endPos - startPos - 1;
allocatedNumNgramArrayIfNecessary(ngramOrder);
allocatedNumNgramForOrderArrayIfNecessary(ngramOrder);
valueCounter.incrementCount(v.asLong(), 1);
final LongArray ngramOrderCounts = numNgramsForEachWord[ngramOrder];
final int word = reverse ? ngram[startPos] : ngram[ngramOrder];
ngramOrderCounts.incrementCount(word, 1);
numNgramsForOrder[ngramOrder]++;
// if (word >= ngramOrderCounts.size()) {
//
// ngramOrderCounts.setAndGrowIfNeeded(word, 1);
// } else {
// ngramOrderCounts.set(word, ngramOrderCounts.get(word) + 1);
// }
}
@Override
public void handleNgramOrderFinished(final int order) {
}
@Override
public void cleanup() {
Logger.startTrack("Cleaning up values");
Logger.logss("Found " + valueCounter.size() + " unique counts");
Logger.endTrack();
}
public LongToIntHashMap getValueCounter() {
return valueCounter;
}
@Override
public void initWithLengths(final List<Long> numNGrams) {
maxNgramOrder = numNGrams.size();
final long numWords = numNGrams.get(0);
numNgramsForEachWord = new LongArray[numNGrams.size()];
for (int ngramOrder = 0; ngramOrder < numNgramsForEachWord.length; ++ngramOrder) {
numNgramsForEachWord[ngramOrder] = LongArray.StaticMethods.newLongArray(numNGrams.get(ngramOrder), numWords, numWords);
}
}
public LongArray[] getNumNgramsForEachWord() {
return Arrays.copyOf(numNgramsForEachWord, maxNgramOrder);
}
public long[] getNumNgramsForEachOrder() {
return Arrays.copyOf(numNgramsForOrder, maxNgramOrder);
}
/**
* @param startPos
* @param endPos
* @return
*/
private int allocatedNumNgramArrayIfNecessary(final int ngramOrder) {
if (numNgramsForEachWord == null) {
numNgramsForEachWord = new LongArray[5];
}
if (ngramOrder >= numNgramsForEachWord.length) {
numNgramsForEachWord = Arrays.copyOf(numNgramsForEachWord, numNgramsForEachWord.length * 2);
}
if (numNgramsForEachWord[ngramOrder] == null) {
numNgramsForEachWord[ngramOrder] = LongArray.StaticMethods.newLongArray(Integer.MAX_VALUE, Integer.MAX_VALUE);
}
return ngramOrder;
}
private int allocatedNumNgramForOrderArrayIfNecessary(final int ngramOrder) {
if (numNgramsForOrder == null) {
numNgramsForOrder = new long[5];
}
if (ngramOrder >= numNgramsForOrder.length) {
numNgramsForOrder = Arrays.copyOf(numNgramsForOrder, numNgramsForOrder.length * 2);
}
return ngramOrder;
}
@Override
public void handleNgramOrderStarted(int order) {
}
}