package org.gfd.gsmlocation.db;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.ArrayList;
/**
* Reader for compact store files. Compact Store is a key sorted key-value
* file for primitive types. Key-Value pairs are packed into 4kb blocks. Every
* byte that does not change is removed from the key and value, thus slightly
* reducing the payload size.<br />
* Fileformat:
* <ol>
* <li>4 bytes: Block count - number of 4KB data blocks
* <li>blockcount x (keysize * 2 + valuesize * 2 + 4):
* min/max keys and values per block + number of entries
* <li>4kb blocks, aligend to 4kb boundaries, unchaged blocks are dropped
* </ol>
*/
public class BCSReader {
/**
* Block metadata.
*/
public final static class BlockMeta {
public final int blockId;
public final int count;
public final byte[][] keyLow;
public final byte[][] keyHigh;
public final byte[][] valueLow;
public final byte[][] valueHigh;
public BlockMeta(
int blockId,
int count,
byte[][] keyLow,
byte[][] keyHigh,
byte[][] valueLow,
byte[][] valueHigh
) {
this.blockId = blockId;
this.count = count;
this.keyLow = keyLow;
this.keyHigh = keyHigh;
this.valueLow = valueLow;
this.valueHigh = valueHigh;
}
private String keyString(byte[][] k) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < k.length; i++) {
byte[] v = k[i];
if (i == 0) {
sb.append("[");
} else {
sb.append(",");
}
for (int j = 0; j < v.length; j++) {
if (j == 0) {
sb.append("[");
} else {
sb.append(",");
}
sb.append(v[j] & 0xff);
}
sb.append("]");
}
sb.append("]");
return sb.toString();
}
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("BLOCK(id=");
sb.append(blockId);
sb.append(",entries=");
sb.append(count);
sb.append(",klow=");
sb.append(keyString(keyLow));
sb.append(",khigh=");
sb.append(keyString(keyHigh));
sb.append(",vlow=");
sb.append(keyString(valueLow));
sb.append(",vhigh=");
sb.append(keyString(valueHigh));
return sb.toString();
}
}
/**
* A single block entry.
*/
public final static class BlockEntry {
public Object[] key;
public Object[] value;
}
protected Class<?>[] keyTypes;
protected Class<?>[] valueTypes;
protected RandomAccessFile file;
protected int keySize;
protected int valueSize;
protected int blockCount;
protected int[] keySizes;
protected int[] valueSizes;
public BCSReader(
Class<?>[] keyTypes,
Class<?>[] valueTypes,
String file
) throws IOException {
this.keyTypes = keyTypes;
this.valueTypes = valueTypes;
this.file = new RandomAccessFile(file, "r");
// compute the key size
int keySize = 0;
int keySizes[] = new int[keyTypes.length];
for (int i = 0; i < keyTypes.length; i++) {
Class<?> keyType = keyTypes[i];
keySizes[i] = type2size(keyType);
keySize += keySizes[i];
}
this.keySize = keySize;
this.keySizes = keySizes;
int valueSize = 0;
int valueSizes[] = new int[valueTypes.length];
for (int i = 0; i < valueTypes.length; i++) {
Class<?> valueType = valueTypes[i];
valueSizes[i] = type2size(valueType);
valueSize += valueSizes[i];
}
this.valueSize = valueSize;
this.valueSizes = valueSizes;
// now read the header
this.file.seek(0l);
this.blockCount = this.file.readInt();
}
/**
* Convert a java boxed type to the required byte count (e.g. Byte to 1).
* @param type The boxed java type
* @return the byte count
*/
protected int type2size(final Class<?> type) {
if (type == Byte.class || type == Boolean.class) {
return 1;
}
if (type == Short.class || type == Character.class) {
return 2;
}
if (type == Integer.class || type == Float.class) {
return 4;
}
if (type == Long.class || type == Double.class) {
return 8;
}
return 0;
}
protected Object bytes2type(final Class<?> type, byte[] bytes) {
if (type == Byte.class) {
return bytes[0];
}
if (type == Boolean.class) {
return bytes[0] == 0;
}
if (type == Short.class) {
return (short)(
((bytes[0] & 0xff) << 8) |
(bytes[1] & 0xff)
);
}
if (type == Character.class) {
return (char)(
((bytes[0] & 0xff) << 8) |
(bytes[1] & 0xff)
);
}
if (type == Integer.class) {
return (int)(
((bytes[0] & 0xff) << 24) |
((bytes[1] & 0xff) << 16) |
((bytes[2] & 0xff) << 8) |
(bytes[3] & 0xff)
);
}
if (type == Long.class) {
return (long)(
((bytes[0] & 0xffl) << 56) |
((bytes[1] & 0xffl) << 48) |
((bytes[2] & 0xffl) << 40) |
((bytes[3] & 0xffl) << 32) |
((bytes[4] & 0xffl) << 24) |
((bytes[5] & 0xffl) << 16) |
((bytes[6] & 0xffl) << 8) |
(bytes[7] & 0xffl)
);
}
if (type == Float.class) {
return Float.intBitsToFloat((int)(
((bytes[0] & 0xff) << 24) |
((bytes[1] & 0xff) << 16) |
((bytes[2] & 0xff) << 8) |
(bytes[3] & 0xff)
));
}
if (type == Double.class) {
return Double.longBitsToDouble((long)(
((bytes[0] & 0xffl) << 56) |
((bytes[1] & 0xffl) << 48) |
((bytes[2] & 0xffl) << 40) |
((bytes[3] & 0xffl) << 32) |
((bytes[4] & 0xffl) << 24) |
((bytes[5] & 0xffl) << 16) |
((bytes[6] & 0xffl) << 8) |
(bytes[7] & 0xffl)
));
}
return null;
}
/**
* Count the number of bytes needed to store values between the low and
* high bound.
* @param low the lower bound.
* @param high the upper bound.
* @return The number of bytes.
*/
protected int requiredSize(byte[][] low, byte[][] high) {
int size = 0;
for (int i = 0; i < low.length; i++) {
byte[] l = low[i];
byte[] h = high[i];
boolean required = false;
for (int j = 0; j < l.length; j++) {
if (required || l[j] != h[j]) {
required = true;
size++;
}
}
}
return size;
}
/**
* Count the number of bytes needed to store each fragment of a 2d byte
* array.
* @param low the lower bound.
* @param high the upper bound.
* @return The byte counts per fragment.
*/
protected int[] requiredSizes(byte[][] low, byte[][] high) {
int[] size = new int[low.length];
for (int i = 0; i < low.length; i++) {
byte[] l = low[i];
byte[] h = high[i];
boolean required = false;
int s = 0;
for (int j = 0; j < l.length; j++) {
if (required || l[j] != h[j]) {
required = true;
s++;
}
}
size[i] = s;
}
return size;
}
/**
* Convert a boxed java value to a byte array.
* @param o The boxed java value.
* @return A byte representation of that value.
*/
protected byte[] type2bytes(final Object o) {
// this method, while way to long, does nothing but a big "switch"
// based on the type.
if (o instanceof Byte) {
return new byte[]{
((Byte)o).byteValue()
};
}
if (o instanceof Boolean) {
if (((Boolean)o).booleanValue()) {
return new byte[]{0};
} else {
return new byte[]{1};
}
}
if (o instanceof Short) {
short s = ((Short)o).shortValue();
return new byte[] { (byte)(s >> 8), (byte) s };
}
if (o instanceof Character) {
char c = ((Character)o).charValue();
short s = (short)c;
return new byte[] {
(byte)(s >> 8),
(byte)s
};
}
if (o instanceof Integer) {
int i = ((Integer)o).intValue();
return new byte[] {
(byte)(i >> 24), (byte)(i >> 16),
(byte)(i >> 8), (byte) i
};
}
if (o instanceof Long) {
long l = ((Long)o).longValue();
return new byte[] {
(byte)(l >> 56), (byte)(l >> 48),
(byte)(l >> 40), (byte)(l >> 32),
(byte)(l >> 24), (byte)(l >> 16),
(byte)(l >> 8), (byte) l
};
}
if (o instanceof Float) {
int i = Float.floatToIntBits(((Float)o).floatValue());
return new byte[] {
(byte)(i >> 24), (byte)(i >> 16),
(byte)(i >> 8), (byte) i
};
}
if (o instanceof Double) {
long l = Double.doubleToLongBits(((Double)o).doubleValue());
return new byte[] {
(byte)(l >> 56), (byte)(l >> 48),
(byte)(l >> 40), (byte)(l >> 32),
(byte)(l >> 24), (byte)(l >> 16),
(byte)(l >> 8), (byte) l
};
}
if (o instanceof byte[]) {
return (byte[])o;
}
return null;
}
/**
* Offset of block metadata.
* @param blockid The block id.
* @return The block metadata offset
*/
protected int blockMetaOffset(int blockid) {
/*
* 4 bytes block count
* keySize bytes: lower bound
* keySize bytes: higher bound
* valueSize bytes: lower bound
* valueSize bytes: upper bound
*/
return 4 + (keySize * 2 + valueSize * 2 + 4) * blockid;
}
/**
* Block offset, shifted by header, aligned with 4kb.
* @param blockid
* @return
*/
protected int blockOffset(int blockid) {
/*
* Header:
* 4 bytes block count
* Per Block: (keySize, keySize, valueSize, valueSize)
* Header padded to 4kb + blockid * 4kb == offset
*/
return (
((4 + blockCount * (keySize*2 + valueSize*2 + 4)) + 4095) / 4096
) * 4096 + blockid * 4096;
}
/**
* Read a data block (4kb).
* @param blockid The block number.
* @return The block data.
* @throws IOException
*/
protected byte[] readBlock(int blockid) throws IOException {
byte[] block = new byte[4096];
int pos = blockOffset(blockid);
synchronized (file) {
file.seek(pos);
file.readFully(block);
}
return block;
}
/**
* Read a key at the given offset. Return the byte array.
* @param pos The file offset, where 0 is the head of the file.
* @return The key, partitioned by key fragments.
* @throws IOException
*/
protected byte[][] readKeyAt(long pos) throws IOException {
byte buf[] = new byte[keySize];
synchronized (file) {
file.seek(pos);
file.readFully(buf);
}
byte res[][] = new byte[keySizes.length][];
int offset = 0;
for (int i = 0; i < keySizes.length; i++) {
byte b[] = new byte[keySizes[i]];
System.arraycopy(buf, offset, b, 0, b.length);
offset += b.length;
res[i] = b;
}
return res;
}
/**
* Read a value at the given offset. Return the byte array.
* @param pos The file offset, where 0 is the head of the file.
* @return The key, partitioned by key fragments.
* @throws IOException
*/
protected byte[][] readValueAt(long pos) throws IOException {
byte buf[] = new byte[valueSize];
synchronized (file) {
file.seek(pos);
file.readFully(buf);
}
byte res[][] = new byte[valueSizes.length][];
int offset = 0;
for (int i = 0; i < valueSizes.length; i++) {
byte b[] = new byte[valueSizes[i]];
System.arraycopy(buf, offset, b, 0, b.length);
offset += b.length;
res[i] = b;
}
return res;
}
/**
* Retrieve the number of entries in a 4kb block.
* @param blockid The block id.
* @return The number of entries.
* @throws IOException
*/
protected int blockEntryCount(int blockid) throws IOException {
int offset = blockMetaOffset(blockid);
int i = 0;
synchronized (file) {
file.seek(offset);
i = file.readInt();
}
return i;
}
/**
* Retrieve the low block limit for the keys in a block. The value is
* the same as the first key in the block.
* @param block the block number.
* @return The lower bound key value.
* @throws IOException
*/
protected byte[][] lowBlockLimit(int block) throws IOException {
return readKeyAt(blockMetaOffset(block) + 4);
}
/**
* Retrieve the upper bound for the keys in a block. The value is the same
* as the last key in the block.
* @param block The block number.
* @return The upper bound key.
* @throws IOException
*/
protected byte[][] highBlockLimit(int block) throws IOException {
return readKeyAt(blockMetaOffset(block) + 4 + keySize);
}
/**
* Retrieve the minimum value bound for a block.
* @param block The block number.
* @return The lower bound.
* @throws IOException
*/
protected byte[][] lowBlockValueLimit(int block) throws IOException {
return readValueAt(blockMetaOffset(block) + 2 * keySize + 4);
}
/**
* Retrieve the maximum value bound for a block.
* @param block The block number.
* @return The maximum bound.
* @throws IOException
*/
protected byte[][] highBlockValueLimit(int block) throws IOException {
return readValueAt(blockMetaOffset(block) + 2 * keySize + valueSize + 4);
}
/**
* Retrieve all metadata for a single block.
* @param blockid The block number.
* @return The BlockMeta instance.
* @throws IOException
*/
protected BlockMeta getBlockMeta(int blockid) throws IOException {
return new BlockMeta(
blockid,
blockEntryCount(blockid),
lowBlockLimit(blockid),
highBlockLimit(blockid),
lowBlockValueLimit(blockid),
highBlockValueLimit(blockid)
);
}
/**
* Compare two key arrays up to the maximum level of the shorter one.
* @param l The left key.
* @param r The right key.
* @return -1 if the left key is smaller, 0 if the keys are the same and
* 1 if the left key is larger than the right key.
*/
protected int compare(byte[][] l, byte[][] r) {
final int len = Math.min(l.length, r.length);
for (int i = 0; i < len; i++) {
final byte[] li = l[i];
final byte[] ri = r[i];
final int leni = Math.min(li.length, ri.length);
for (int j = 0; j < leni; j++) {
final int lv = li[j] & 0xff;
final int rv = ri[j] & 0xff;
if (lv < rv) {
return -1;
}
if (lv > rv) {
return 1;
}
}
}
return 0;
}
/**
* Search for the block range containing a key, retrive the BlockMeta
* information for the first and last block.
* @param key The key prefix.
* @param low The current low bound of the search.
* @param high The current upper bound of the search.
* @return The block metadata, or null if not found.
* @throws IOException
*/
protected BlockMeta[] blockRangeSearch(
byte[][] key,
int low,
int high
) throws IOException
{
if (low > high) {
return null;
}
final byte[][] lowKey = lowBlockLimit(low);
final byte[][] highKey = highBlockLimit(high);
final int cmpLow = compare(lowKey, key);
if (cmpLow == 1) {
return null;
}
final int cmpHigh = compare(highKey, key);
if (cmpHigh == -1) {
return null;
}
if (low == high) {
// we are down to one block, load it and return it as first and
// last block
BlockMeta meta = new BlockMeta(
low, blockEntryCount(low), lowKey, highKey,
lowBlockValueLimit(low), highBlockValueLimit(low));
return new BlockMeta[]{meta,meta};
}
final byte[][] lowHighKey = highBlockLimit(low);
final byte[][] highLowKey = lowBlockLimit(high);
final int cmpLowHigh = compare(lowHighKey, key);
final int cmpHighLow = compare(highLowKey, key);
if (high - low == 1) {
// we have to check if we need both, or none, but we handle this
// before we're searching for a mid.
if (cmpLowHigh == -1) {
// we don't need the lower bound
return blockRangeSearch(key, low + 1, high);
}
if (cmpHighLow == 1) {
// we don't need the lower bound
return blockRangeSearch(key, low, high - 1);
}
return new BlockMeta[]{
new BlockMeta(
low, blockEntryCount(low), lowKey, lowHighKey,
lowBlockValueLimit(low), highBlockValueLimit(low)),
new BlockMeta(
high, blockEntryCount(high), highLowKey, highKey,
lowBlockValueLimit(high), highBlockValueLimit(high))
};
}
if (cmpLowHigh == 0 && cmpHighLow == 0) {
// we need the full range
BlockMeta metaLow = new BlockMeta(
low, blockEntryCount(low), lowKey, lowHighKey,
lowBlockValueLimit(low), highBlockValueLimit(low));
BlockMeta metaHigh = new BlockMeta(
high, blockEntryCount(high), highLowKey, highKey,
lowBlockValueLimit(high), highBlockValueLimit(high));
return new BlockMeta[]{metaLow, metaHigh};
}
int mid = (low + high) / 2;
final byte lowMidKey[][] = lowBlockLimit(mid);
final int cmpLowMid = compare(lowMidKey, key);
if (cmpLowMid == 1) {
return blockRangeSearch(key, low, mid - 1);
}
final byte highMidKey[][] = highBlockLimit(mid);
final int cmpHighMid = compare(highMidKey, key);
if (cmpHighMid == -1) {
return blockRangeSearch(key, mid + 1, high);
}
// cmpHighMid == 0 && cmpLowMid == 0
// This means mid is part of the range
return new BlockMeta[]{
blockRangeSearch(key, low, mid)[0],
blockRangeSearch(key, mid, high)[1]
};
}
/**
* Search for all occurancies of a given key prefix.
* @param key The key prefix.
* @return Array of lower and upper bound metadata.
* @throws IOException
*/
protected BlockMeta[] blockRangeSearch(byte[][] key) throws IOException {
return blockRangeSearch(key, 0, blockCount - 1);
}
/**
* Scan a full block for a single key/value pair identified by key.
* @param meta The block metadata.
* @param key The key that should be found.
* @return The value data or null if not found.
* @throws IOException
*/
protected byte[][] scanBlock(BlockMeta meta, byte[][] key) throws IOException {
byte[] block = readBlock(meta.blockId);
int offset = 0;
byte[][] keybuf = new byte[keySizes.length][];
for (int i = 0; i < keySizes.length; i++) {
keybuf[i] = new byte[keySizes[i]];
System.arraycopy(meta.keyLow[i], 0, keybuf[i], 0, keySizes[i]);
}
int blockValueSize = requiredSize(meta.valueLow, meta.valueHigh);
int[] blockKeySizes = requiredSizes(meta.keyLow, meta.keyHigh);
int[] blockValueSizes = requiredSizes(meta.valueLow, meta.valueHigh);
int count = meta.count;
while (count > 0) {
// load block key
for (int i = 0; i < keySizes.length; i++) {
int d = keySizes[i] - blockKeySizes[i];
if (blockKeySizes[i] > 0) {
System.arraycopy(block, offset, keybuf[i], d, blockKeySizes[i]);
offset += blockKeySizes[i];
}
}
int cmp = compare(keybuf, key);
count--;
if (cmp == -1) {
offset += blockValueSize;
continue;
}
if (cmp == 0) {
// we have the right block, decode the value
// initialize the buffer
byte[][] valuebuf = new byte[valueSizes.length][];
for (int i = 0; i < valueSizes.length; i++) {
valuebuf[i] = new byte[valueSizes[i]];
System.arraycopy(meta.valueLow[i], 0, valuebuf[i], 0, valueSizes[i]);
}
// decode
for (int i = 0; i < valueSizes.length; i++) {
int d = valueSizes[i] - blockValueSizes[i];
if (blockValueSizes[i] > 0) {
System.arraycopy(block, offset, valuebuf[i], d, blockValueSizes[i]);
offset += blockValueSizes[i];
}
}
// return
return valuebuf;
}
}
return null;
}
/**
* Scan a block for all key/value pairs starting with a given key prefix.
* @param meta The block metadata.
* @param key The key prefix.
* @return Array of block entries or null if not found.
* @throws IOException
*/
protected BlockEntry[] scanFullBlock(BlockMeta meta, byte[][] key) throws IOException {
ArrayList<BlockEntry> entries = new ArrayList<BlockEntry>();
System.out.println(meta);
byte[] block = readBlock(meta.blockId);
int offset = 0;
byte[][] keybuf = new byte[keySizes.length][];
for (int i = 0; i < keySizes.length; i++) {
keybuf[i] = new byte[keySizes[i]];
System.arraycopy(meta.keyLow[i], 0, keybuf[i], 0, keySizes[i]);
}
int blockValueSize = requiredSize(meta.valueLow, meta.valueHigh);
int[] blockKeySizes = requiredSizes(meta.keyLow, meta.keyHigh);
int[] blockValueSizes = requiredSizes(meta.valueLow, meta.valueHigh);
int count = meta.count;
while (count > 0) {
for (int i = 0; i < keySizes.length; i++) {
int d = keySizes[i] - blockKeySizes[i];
if (blockKeySizes[i] > 0) {
System.arraycopy(block, offset, keybuf[i], d, blockKeySizes[i]);
offset += blockKeySizes[i];
}
}
count--;
int cmp = compare(keybuf, key);
if (cmp < 0) {
offset += blockValueSize;
continue;
}
if (cmp == 0) {
// we have a hit, decode value and create a BlockEntry
byte[][] valuebuf = new byte[valueSizes.length][];
for (int i = 0; i < valueSizes.length; i++) {
valuebuf[i] = new byte[valueSizes[i]];
System.arraycopy(meta.valueLow[i], 0, valuebuf[i], 0, valueSizes[i]);
}
// decode
for (int i = 0; i < valueSizes.length; i++) {
int d = valueSizes[i] - blockValueSizes[i];
if (blockValueSizes[i] > 0) {
System.arraycopy(block, offset, valuebuf[i], d, blockValueSizes[i]);
offset += blockValueSizes[i];
}
}
BlockEntry e = new BlockEntry();
Object[] okey = new Object[keybuf.length];
for (int i = 0; i < keybuf.length; i++) {
okey[i] = bytes2type(keyTypes[i], keybuf[i]);
}
e.key = okey;
Object[] ovalue = new Object[valuebuf.length];
for (int i = 0; i < valuebuf.length; i++) {
ovalue[i] = bytes2type(valueTypes[i], valuebuf[i]);
}
e.value = ovalue;
entries.add(e);
}
if (cmp > 0) {
break;
}
}
return entries.toArray(new BlockEntry[entries.size()]);
}
/**
* Retrieve the value(s) for a given key, or null if the given key can not
* be found.
* @param key The key.
* @return The value(s) associated with the key.
* @throws IOException
*/
public Object[] get(Object ... key) throws IOException {
// encode key
byte bkey[][] = new byte[key.length][];
for (int i = 0; i < key.length; i++) {
bkey[i] = type2bytes(key[i]);
}
// we have a multi-byte sequence now, search for the key :-)
BlockMeta[] meta = blockRangeSearch(bkey);
if (meta == null) {
return null;
}
// we have a block, scan for value
byte[][] value = null;
value = scanBlock(meta[0], bkey);
if (meta[0].blockId != meta[1].blockId) {
for (int i = meta[0].blockId + 1; value == null && i <= meta[1].blockId; i++) {
value = scanBlock(getBlockMeta(i), bkey);
}
}
if (value == null) {
return null;
}
// transform value into
Object[] result = new Object[value.length];
for (int i = 0; i < value.length; i++) {
result[i] = bytes2type(valueTypes[i], value[i]);
}
return result;
}
/**
* Retrieve all entries with a given key prefix.
* @param key The key prefix.
* @return Array of entries.
* @throws IOException
*/
public BlockEntry[] getAll(Object ... key) throws IOException {
byte bkey[][] = new byte[key.length][];
for (int i = 0; i < key.length; i++) {
bkey[i] = type2bytes(key[i]);
}
// we have a multi-byte sequence now, search for the key :-)
BlockMeta[] meta = blockRangeSearch(bkey);
if (meta == null) {
return null;
}
ArrayList<BlockEntry> entries = new ArrayList<BlockEntry>();
for (int i = meta[0].blockId; i <= meta[1].blockId; i++) {
BlockMeta m = getBlockMeta(i);
BlockEntry[] es = scanFullBlock(m, bkey);
for (BlockEntry e : es) {
entries.add(e);
}
}
return entries.toArray(new BlockEntry[entries.size()]);
}
}