/*******************************************************************************
* Copyright (c) 2000, 2003 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Common Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/cpl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
* MetaMatrix, Inc - repackaging and updates for use as a metadata store
*******************************************************************************/
package org.teiid.designer.core.index;
import java.io.UTFDataFormatException;
import org.teiid.core.designer.util.StringUtilities;
/**
* Uses prefix coding on words, and gamma coding of document numbers differences.
*
* @since 8.0
*/
public class GammaCompressedIndexBlock extends IndexBlock {
CodeByteStream writeCodeStream = new CodeByteStream();
CodeByteStream readCodeStream;
char[] prevWord = null;
/**
* @param blockSize
*/
public GammaCompressedIndexBlock(int blockSize) {
super(blockSize);
readCodeStream = new CodeByteStream(field.buffer());
}
/**
* @see IndexBlock#addEntry
*/
@Override
public boolean addEntry(WordEntry entry) {
writeCodeStream.reset();
encodeEntry(entry, prevWord, writeCodeStream);
if (getOffset() + writeCodeStream.byteLength() > this.blockSize - 2) {
return false;
}
byte[] bytes = writeCodeStream.toByteArray();
field.put(getOffset(), bytes);
setOffset(getOffset() + bytes.length);
prevWord = entry.getWord();
return true;
}
protected void encodeEntry(WordEntry entry, char[] prevWord, CodeByteStream codeStream) {
char[] word = entry.getWord();
int prefixLen = prevWord == null ? 0 : Math.min(StringUtilities.prefixLength(prevWord, word), 255);
codeStream.writeByte(prefixLen);
codeStream.writeUTF(word, prefixLen, word.length);
int n = entry.getNumRefs();
codeStream.writeGamma(n);
int prevRef = 0;
for (int i = 0; i < n; ++i) {
int ref = entry.getRef(i);
if (ref <= prevRef)
throw new IllegalArgumentException();
codeStream.writeGamma(ref - prevRef);
prevRef = ref;
}
}
/**
* @see IndexBlock#flush
*/
@Override
public void flush() {
if (getOffset() > 0) {
field.putInt2(getOffset(), 0);
setOffset(0);
prevWord = null;
}
}
/**
* @see IndexBlock#nextEntry
*/
@Override
public boolean nextEntry(WordEntry entry) {
try {
readCodeStream.reset(field.buffer(), getOffset());
int prefixLength = readCodeStream.readByte();
char[] word = readCodeStream.readUTF();
if (prevWord != null && prefixLength > 0) {
char[] temp = new char[prefixLength + word.length];
System.arraycopy(prevWord, 0, temp, 0, Math.min(prefixLength, prevWord.length));
System.arraycopy(word, 0, temp, Math.min(prefixLength, prevWord.length), word.length);
word = temp;
}
if (word.length == 0) {
return false;
}
entry.reset(word);
int n = readCodeStream.readGamma();
int prevRef = 0;
for (int i = 0; i < n; ++i) {
int ref = prevRef + readCodeStream.readGamma();
if (ref < prevRef)
throw new InternalError();
entry.addRef(ref);
prevRef = ref;
}
setOffset(readCodeStream.byteLength());
prevWord = word;
return true;
} catch (UTFDataFormatException e) {
return false;
}
}
/**
* @see IndexBlock#reset
*/
@Override
public void reset() {
super.reset();
prevWord = null;
}
}