package com.colloquial.arithcode;
/** Package class for use by the PPMModel. A fragmentary adaptive
* unigram model that allows exclusions in converting points to
* intervals and vice-versa. One such model will be used for each
* unigram context.
*
* @author <a href="http://www.colloquial.com/carp/">Bob Carpenter</a>
* @version 1.1
* @since 1.1
*/
final class ExcludingAdaptiveUnigramModel {
/** Construct an excluding adaptive unigram model.
*/
public ExcludingAdaptiveUnigramModel() {
_count = new int[257];
java.util.Arrays.fill(_count,1); // counts are non-cumulative
}
/** Compute the resulting interval to code the specified symbol given
* the specified excluded bytes.
* @param symbol Symbol to code.
* @param result Interval to code the symbol.
* @param exclusions Bytes to exclude as possible outcomes for interval.
*/
public void interval(int symbol, int[] result, ByteSet exclusions) {
if (symbol == ArithCodeModel.EOF) symbol = EOF_INDEX;
int sum = 0;
for (int i = 0; i < symbol; ++i) if (!exclusions.contains(i)) sum += _count[i];
result[0] = sum;
sum += _count[symbol];
result[1] = sum;
for (int i = symbol+1; i < _count.length-1; ++i) if (!exclusions.contains(i)) sum += _count[i];
if (symbol != EOF_INDEX) sum += _count[EOF_INDEX];
result[2] = sum;
increment(symbol);
}
/** Return the symbol corresponding to the specified count, given
* the specified excluded bytes.
* @param midCount Count of symbol to return.
* @param exclusions Bytes to exclude from consideration.
* @return Symbol represented by specified count.
*/
public int pointToSymbol(int midCount, ByteSet exclusions) {
int sum = 0;
for (int mid = 0; ; ++mid) {
if (mid != EOF_INDEX && exclusions.contains(mid)) continue;
sum += _count[mid];
if (sum > midCount) return (mid == EOF_INDEX) ? ArithCodeModel.EOF : mid;
}
}
/** Total count for interval given specified set of exclusions.
* @param exclusions Bytes to exclude as outcomes.
* @return Total count of all non-excluded outcomes.
*/
public int totalCount(ByteSet exclusions) {
int total = 0;
for (int i = 0; i < _count.length; ++i)
if (i == EOF_INDEX || !exclusions.contains(i)) total += _count[i];
return total;
}
/** Increment the count for the given outcome.
* @param i Outcome to increment
*/
public void increment(int i) {
if (++_count[i] > MAX_INDIVIDUAL_COUNT) rescale();
}
/** Counts for each outcome. Indices 0 to 255 for the
* usual counts, 256 for end-of-file, and 257 for total.
*/
private int[] _count;
/** Rescale the counts by dividing all frequencies by 2, but
* taking a minimum of 1.
*/
private void rescale() {
for (int i = 0; i < _count.length; ++i) _count[i] = (_count[i] + 1)/2;
}
/** Maximum count before rescaling.
*/
private static final int MAX_INDIVIDUAL_COUNT = 8*1024;
/** Index in the count array for the end-of-file outcome.
*/
private static final int EOF_INDEX = 256;
}