package org.andengine.util.adt.dictionary;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import org.andengine.util.FileUtils;
import org.andengine.util.StreamUtils;
import org.andengine.util.adt.bit.BitVector;
import org.andengine.util.adt.bit.ByteBackedBitVector;
import org.andengine.util.adt.bit.IBitVector;
import org.andengine.util.adt.bit.LongBackedBitVector;
import org.andengine.util.adt.data.DataUtils;
/**
* @author Nicolas Gramlich
* @since Nov 20, 2012
*/
public class Dictionary {
// ===========================================================
// Constants
// ===========================================================
private static final short VERSION = 1;
private static final Comparator<String> CASEINSENSITIVE_REVERSE_LEXICOGRAPHICAL_COMPARATOR = new Comparator<String>() {
@Override
public int compare(final String pStringA, final String pStringB) {
return -(pStringA.compareTo(pStringB));
}
};
private static final int OFFSET_VERSION = 0;
private static final int SIZE_VERSION = Short.SIZE;
private static final int OFFSET_CHARACTER_COUNT = Dictionary.OFFSET_VERSION + Dictionary.SIZE_VERSION;
private static final int SIZE_CHARACTER_COUNT = Integer.SIZE;
private static final int OFFSET_CHARACTER_BITLENGTH = Dictionary.OFFSET_CHARACTER_COUNT + Dictionary.SIZE_CHARACTER_COUNT;
private static final int SIZE_CHARACTER_BITLENGTH = Byte.SIZE;
private static final int OFFSET_INDEXENTRY_COUNT = Dictionary.OFFSET_CHARACTER_BITLENGTH + Dictionary.SIZE_CHARACTER_BITLENGTH;
private static final int SIZE_INDEXENTRY_COUNT = Integer.SIZE;
private static final int OFFSET_INDEXENTRY_STARTINDEX_BITLENGTH = Dictionary.OFFSET_INDEXENTRY_COUNT + Dictionary.SIZE_INDEXENTRY_COUNT;
private static final int SIZE_INDEXENTRY_STARTINDEX_BITLENGTH = Byte.SIZE;
private static final int OFFSET_INDEXENTRY_LENGTH_BITLENGTH = Dictionary.OFFSET_INDEXENTRY_STARTINDEX_BITLENGTH + Dictionary.SIZE_INDEXENTRY_STARTINDEX_BITLENGTH;
private static final int SIZE_INDEXENTRY_LENGTH_BITLENGTH = Byte.SIZE;
private static final int OFFSET_CHARACTERS = Dictionary.OFFSET_INDEXENTRY_LENGTH_BITLENGTH + Dictionary.SIZE_INDEXENTRY_LENGTH_BITLENGTH;
// ===========================================================
// Fields
// ===========================================================
private final short mVersion;
private final IBitVector mBitVector;
private final int mCharacterCount;
private final int mCharacterBitLength;
private final char[] mCharacterTable;
private final int mEntryCount;
private final int mEntryStartIndexBitLength;
private final int mEntryLengthBitLength;
private final int mEntriesOffset;
private final int mCharacterSequenceOffset;
// ===========================================================
// Constructors
// ===========================================================
public Dictionary(final byte[] pBytes) {
this(new LongBackedBitVector(pBytes));
}
public Dictionary(final IBitVector pBitVector) {
this.mBitVector = pBitVector;
this.mVersion = this.mBitVector.getShort(Dictionary.OFFSET_VERSION);
if (this.mVersion != Dictionary.VERSION) {
throw new IllegalArgumentException("Illegal version: " + this.mVersion);
}
this.mCharacterCount = this.mBitVector.getInt(Dictionary.OFFSET_CHARACTER_COUNT);
this.mCharacterBitLength = this.mBitVector.getByte(Dictionary.OFFSET_CHARACTER_BITLENGTH);
this.mCharacterTable = new char[this.mCharacterCount];
for (int i = 0; i < this.mCharacterCount; i++) {
this.mCharacterTable[i] = (char) this.mBitVector.getShort(Dictionary.OFFSET_CHARACTERS + (i * Character.SIZE));
}
this.mEntryCount = this.mBitVector.getInt(Dictionary.OFFSET_INDEXENTRY_COUNT);
this.mEntryStartIndexBitLength = this.mBitVector.getByte(Dictionary.OFFSET_INDEXENTRY_STARTINDEX_BITLENGTH);
this.mEntryLengthBitLength = this.mBitVector.getByte(Dictionary.OFFSET_INDEXENTRY_LENGTH_BITLENGTH);
this.mEntriesOffset = Dictionary.OFFSET_CHARACTERS + (this.mCharacterCount * Character.SIZE);
this.mCharacterSequenceOffset = this.mEntriesOffset + (this.mEntryCount * (this.mEntryStartIndexBitLength + this.mEntryLengthBitLength));
}
// ===========================================================
// Getter & Setter
// ===========================================================
public int getVersion() {
return this.mVersion;
}
public int getEntryCount() {
return this.mEntryCount;
}
public String getEntry(final int pIndex) {
if ((pIndex < 0) || (pIndex > this.mEntryCount)) {
throw new IllegalArgumentException("pIndex out of bounds: " + pIndex);
}
final int offsetInEntries = pIndex * (this.mEntryStartIndexBitLength + this.mEntryLengthBitLength);
final int entryOffset = this.mEntriesOffset + offsetInEntries;
final int startIndex = this.mBitVector.getBits(entryOffset, this.mEntryStartIndexBitLength);
final int length = this.mBitVector.getBits(entryOffset + this.mEntryStartIndexBitLength, this.mEntryLengthBitLength);
final char[] chars = new char[length]; // TODO Try to avoid allocation
for (int i = 0; i < length; i++) {
final int characterOffset = this.mCharacterSequenceOffset + ((i + startIndex) * this.mCharacterBitLength);
final int characterBits = this.mBitVector.getBits(characterOffset, this.mCharacterBitLength);
final char character = this.mCharacterTable[characterBits];
chars[i] = character;
}
return new String(chars);
}
public String[] getEntries() {
final String[] entries = new String[this.mEntryCount];
for (int i = 0; i < this.mEntryCount; i++) {
entries[i] = this.getEntry(i);
}
return entries;
}
// ===========================================================
// Methods for/from SuperClass/Interfaces
// ===========================================================
// ===========================================================
// Methods
// ===========================================================
public void save(final OutputStream pOutputStream) throws IOException {
this.mBitVector.save(new DataOutputStream(pOutputStream));
}
public static Dictionary load(final InputStream pInputStream) throws IOException {
return new Dictionary(ByteBackedBitVector.load(new DataInputStream(pInputStream)));
}
public boolean contains(final String pString) {
if (pString == null) {
throw new IllegalArgumentException("pCharSequence must not be null");
}
if (pString.length() < 0) {
throw new IllegalArgumentException("pCharSequence must not be empty");
}
int lowerBound = 0;
int higherBound = this.mEntryCount - 1;
/* Binary search. */
while (lowerBound <= higherBound) {
final int mid = ((lowerBound + higherBound) >>> 1);
// TODO Optimization: don't decode the whole String, but compare char by char?
final String midEntry = this.getEntry(mid);
final int compared = midEntry.compareTo(pString);
if (compared > 0) {
lowerBound = mid + 1;
} else if (compared < 0) {
higherBound = mid - 1;
} else {
return true;
}
}
return false;
}
// ===========================================================
// Inner and Anonymous Classes
// ===========================================================
public static class Factory {
// ===========================================================
// Constants
// ===========================================================
// ===========================================================
// Fields
// ===========================================================
// ===========================================================
// Constructors
// ===========================================================
public static Dictionary create(final File pFile) throws IOException {
return Dictionary.Factory.create(FileUtils.readLines(pFile));
}
public static Dictionary create(final InputStream pInputStream) throws IOException {
return Dictionary.Factory.create(StreamUtils.readLines(pInputStream));
}
public static Dictionary create(final Reader pReader) throws IOException {
return Dictionary.Factory.create(StreamUtils.readLines(pReader));
}
public static Dictionary create(final String ... pStrings) {
Arrays.sort(pStrings, Dictionary.CASEINSENSITIVE_REVERSE_LEXICOGRAPHICAL_COMPARATOR);
final Dictionary.Factory.Entries entryDictionary = Dictionary.Factory.createDictionary(pStrings);
final String string = entryDictionary.mString;
final int characterCountTotal = string.length();
final int characterCount = entryDictionary.mCharacters.length;
final int characterBitLength = DataUtils.getBitLength(entryDictionary.mCharacters.length);
final int entryCount = entryDictionary.mEntries.size();
final int entryStartIndexBitLength = DataUtils.getBitLength(entryDictionary.mEntryStartIndexMaximum);
final int entryLengthBitLength = DataUtils.getBitLength(entryDictionary.mEntryLengthMaximum);
final int bitsRequired = Dictionary.Factory.calculateBitsRequired(characterCountTotal, characterCount, characterBitLength, entryCount, entryStartIndexBitLength, entryLengthBitLength);
final IBitVector bitVector = new ByteBackedBitVector(bitsRequired);
/* Write meta data. */
bitVector.setShort(Dictionary.OFFSET_VERSION, Dictionary.VERSION);
bitVector.setInt(Dictionary.OFFSET_CHARACTER_COUNT, characterCount);
bitVector.setByte(Dictionary.OFFSET_CHARACTER_BITLENGTH, (byte) characterBitLength);
bitVector.setInt(Dictionary.OFFSET_INDEXENTRY_COUNT, entryCount);
bitVector.setByte(Dictionary.OFFSET_INDEXENTRY_STARTINDEX_BITLENGTH, (byte) entryStartIndexBitLength);
bitVector.setByte(Dictionary.OFFSET_INDEXENTRY_LENGTH_BITLENGTH, (byte) entryLengthBitLength);
/* Character table. */
for (int i = 0; i < characterCount; i++) {
final char character = entryDictionary.mCharacters[i];
final short characterBits = (short) character;
bitVector.setShort(Dictionary.OFFSET_CHARACTERS + (i * Character.SIZE), characterBits);
}
/* Index entries. */
final int offsetEntries = Dictionary.OFFSET_CHARACTERS + (characterCount * Character.SIZE);
for (int i = 0; i < entryCount; i++) {
final Entry entry = entryDictionary.mEntries.get(i);
final int entryStartIndexBits = entry.mStartIndex; // TODO Fix bug when this goes over 2048? Maybe it's masking in the decoder?
final int entryLengthBits = entry.mLength;
final int offsetInEntries = i * (entryStartIndexBitLength + entryLengthBitLength);
final int offsetEntryStartIndex = offsetEntries + offsetInEntries;
final int offsetEntryLength = offsetEntryStartIndex + entryStartIndexBitLength;
bitVector.setBits(offsetEntryStartIndex, entryStartIndexBits, Integer.SIZE - entryStartIndexBitLength, entryStartIndexBitLength);
bitVector.setBits(offsetEntryLength, entryLengthBits, Integer.SIZE - entryLengthBitLength, entryLengthBitLength);
}
final int offsetCharacterString = offsetEntries + (entryCount * (entryStartIndexBitLength + entryLengthBitLength));
/* Character string. */
for (int i = 0; i < characterCountTotal; i++) {
final int offsetInCharacterString = i * characterBitLength;
final int offsetCharacter = offsetCharacterString + offsetInCharacterString;
final char character = string.charAt(i);
final int characterBits = Arrays.binarySearch(entryDictionary.mCharacters, character); // TODO Use short?
bitVector.setBits(offsetCharacter, characterBits, Integer.SIZE - characterBitLength, characterBitLength);
}
return new Dictionary(bitVector);
}
private static int calculateBitsRequired(final int pCharacterCountTotal, final int pCharacterCount, final int pCharacterBitLength, final int pEntryCount, final int pEntryStartIndexBitLength, final int pEntryLengthBitLength) {
return Dictionary.SIZE_VERSION
+ Dictionary.SIZE_CHARACTER_COUNT
+ Dictionary.SIZE_CHARACTER_BITLENGTH
+ Dictionary.SIZE_INDEXENTRY_COUNT
+ Dictionary.SIZE_INDEXENTRY_STARTINDEX_BITLENGTH
+ Dictionary.SIZE_INDEXENTRY_LENGTH_BITLENGTH
+ (pCharacterCount * Character.SIZE)
+ (pEntryCount * (pEntryStartIndexBitLength + pEntryLengthBitLength))
+ (pCharacterBitLength * pCharacterCountTotal);
}
private static Entries createDictionary(final String ... pStrings) {
final ArrayList<Entry> entries = new ArrayList<Entry>();
final StringBuilder stringBuilder = new StringBuilder();
for (final String string : pStrings) {
final int existingStringStartIndex = stringBuilder.indexOf(string);
final int stringBuilderLength = stringBuilder.length();
final int stringLength = string.length();
if (existingStringStartIndex >= 0) {
/* Reference string. */
final int startIndex = existingStringStartIndex;
entries.add(new Entry(startIndex, stringLength));
} else {
/* Check if the stringbuilder ends with any prefix of the string. */
final int prefixLength = Dictionary.Factory.getPrefixLength(stringBuilder, string);
if (prefixLength == -1) {
/* Append new string. */
final int startIndex = stringBuilderLength;
entries.add(new Entry(startIndex, stringLength));
stringBuilder.append(string);
} else {
/* Append remainder of string after prefix. */
final int startIndex = stringBuilderLength - prefixLength;
entries.add(new Entry(startIndex, stringLength));
stringBuilder.append(string.substring(prefixLength));
}
}
}
return new Dictionary.Factory.Entries(entries, stringBuilder.toString());
}
// ===========================================================
// Getter & Setter
// ===========================================================
// ===========================================================
// Methods for/from SuperClass/Interfaces
// ===========================================================
// ===========================================================
// Methods
// ===========================================================
private static int getPrefixLength(final StringBuilder pStringBuilder, final String pString) {
final int stringBuilderLength = pStringBuilder.length();
for (int prefixLength = pString.length() - 1; prefixLength > 0; prefixLength--) {
final String prefix = pString.substring(0, prefixLength);
final int prefixLastIndexStart = pStringBuilder.indexOf(prefix, stringBuilderLength - prefixLength);
if (prefixLastIndexStart >= 0) {
return prefixLength;
}
}
return -1;
}
// ===========================================================
// Inner and Anonymous Classes
// ===========================================================
public static class Entries {
// ===========================================================
// Constants
// ===========================================================
// ===========================================================
// Fields
// ===========================================================
private final ArrayList<Entry> mEntries;
private final String mString;
private final int mEntryStartIndexMaximum;
private final int mEntryLengthMaximum;
private final char[] mCharacters;
// ===========================================================
// Constructors
// ===========================================================
public Entries(final ArrayList<Entry> pEntries, final String pString) {
this.mEntries = pEntries;
this.mString = pString;
int entryStartIndexMaximum = 0;
int entryLengthMaximum = 0;
for (final Entry entry : pEntries) {
entryStartIndexMaximum = Math.max(entryStartIndexMaximum, entry.mStartIndex);
entryLengthMaximum = Math.max(entryLengthMaximum, entry.mLength);
}
this.mEntryStartIndexMaximum = entryStartIndexMaximum;
this.mEntryLengthMaximum = entryLengthMaximum;
/* Determine all characters used. */
final IBitVector characterBitVector = new ByteBackedBitVector(Character.MAX_VALUE);
final int stringLength = pString.length();
for (int i = 0; i < stringLength; i++) {
characterBitVector.setBit(pString.charAt(i));
}
int characterCount = 0;
for (int i = 0; i < Character.MAX_VALUE; i++) {
if (characterBitVector.getBit(i) == BitVector.TRUE) {
characterCount++;
}
}
this.mCharacters = new char[characterCount];
characterCount = 0;
for (int i = 0; i < Character.MAX_VALUE; i++) {
if (characterBitVector.getBit(i) == BitVector.TRUE) {
this.mCharacters[characterCount++] = (char) i;
}
}
}
// ===========================================================
// Getter & Setter
// ===========================================================
// ===========================================================
// Methods for/from SuperClass/Interfaces
// ===========================================================
// ===========================================================
// Methods
// ===========================================================
// ===========================================================
// Inner and Anonymous Classes
// ===========================================================
}
public static class Entry {
// ===========================================================
// Constants
// ===========================================================
// ===========================================================
// Fields
// ===========================================================
private final int mStartIndex;
private final int mLength;
// ===========================================================
// Constructors
// ===========================================================
public Entry(final int pStartIndex, final int pLength) {
this.mStartIndex = pStartIndex;
this.mLength = pLength;
}
// ===========================================================
// Getter & Setter
// ===========================================================
// ===========================================================
// Methods for/from SuperClass/Interfaces
// ===========================================================
// ===========================================================
// Methods
// ===========================================================
// ===========================================================
// Inner and Anonymous Classes
// ===========================================================
}
}
}