package com.colloquial.arithcode;
/** <P>Provides an adaptive model based on bytes observed in the input
* stream. Each byte count is initialized at <code>1</code> and
* incremented by <code>1</code> for each instance seen. If
* incrementing an outcome causes the total count to exceed
* <code>MAX_COUNT</code>, then all counts are divided by 2 and
* rounded up. Estimation is by frequency (also known as a maximum
* likelihood estimate).
*
* @author <a href="http://www.colloquial.com/carp/">Bob Carpenter</a>
* @version 1.1
* @since 1.0
*/
public final class AdaptiveUnigramModel implements ArithCodeModel {
/** Construct an adaptive unigram model, initializing all byte counts
* and end-of-file to <code>1</code>.
*/
public AdaptiveUnigramModel() { // initial cumulative counts
for (int i = 0; i < NUM_BYTES; ++i) _count[i] = i; // low[i] = high[i+1]
_count[EOF_INDEX] = EOF_INDEX; // low[EOF] = high[255]
_count[TOTAL_INDEX] = TOTAL_INDEX; // total = high[EOF]
}
// specified in ArithCodeModel
public void interval(int symbol, int[] result) {
if (symbol == EOF) symbol = EOF_INDEX;
result[0] = lowCount(symbol);
result[1] = highCount(symbol);
result[2] = totalCount();
increment(symbol);
}
// specified in ArithCodeModel
public int pointToSymbol(int midCount) {
int low = 0;
int high = TOTAL_INDEX;
while (true) { // binary search returns when it finds result
int mid = (high+low)/2;
if (_count[mid] > midCount) {
if (high == mid) --high;
else high = mid;
} else if (_count[mid+1] > midCount) {
return (mid==EOF_INDEX) ? EOF : mid;
} else {
if (low==mid) ++low;
else low = mid;
}
}
}
// specified in ArithCodeModel
public int totalCount() { return _count[TOTAL_INDEX]; }
// specified in ArithCodeModel
public boolean escaped(int symbol) { return false; }
// specified in ArithCodeModel
public void exclude(int i) { }
// specified by ArithCodeModel
public void increment(int i) {
while (++i <= TOTAL_INDEX) ++_count[i];
if (totalCount() >= MAX_COUNT) rescale();
}
/** Counts for each outcome. Indices 0 to 255 for the
* usual counts, 256 for end-of-file, and 257 for total.
* Each outcome i between 0-256 is coded by interval
* (_count[i],_count[i+1],_count[257]).
*/
private int[] _count = new int[258];
/** The cumulative count of all outcomes below given outcome.
* @param i Index of given outcome.
* @return Low count of interval for given symbol.
*/
private int lowCount(int i) { return _count[i]; }
/** The cumulative count of all outcomes below given outcome plus
* the count of the outcome.
* @param i Index of given outcome.
* @return High count of interval for given symbol.
*/
private int highCount(int i) { return _count[i+1]; }
/** Rescale the counts by adding 1 to all counts and dividing by
* <code>2</code>.
*/
private void rescale() {
int[] freqs = new int[_count.length];
for (int i = 1; i < freqs.length; ++i)
freqs[i] = (_count[i] - _count[i-1] + 1) / 2; // compute from cumulative; round up
for (int i = 1; i < _count.length; ++i) // compute cumulative;
_count[i] = _count[i-1] + freqs[i]; // _count[0] = 0 is implicit
}
/** Maximum count before rescaling.
*/
private static final int MAX_COUNT = 64*1024;
/** Total number of bytes.
*/
private static final int NUM_BYTES = 256;
/** Index in the count array for the end-of-file outcome.
*/
private static final int EOF_INDEX = 256;
/** Index in the count array for the cumulative total of all
* outcomes.
*/
private static final int TOTAL_INDEX = 257;
}