/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-06 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
* $Id$
*/
package org.exist.numbering;
import org.exist.storage.io.VariableByteInput;
import java.io.IOException;
import java.util.Arrays;
/**
* Base class representing a node id in the form of a dynamic level number (DLN).
* See {@link DLN}. DLNBase handles the efficient binary encoding of node ids.
*
* Level values are stored consecutively, using a fixed prefix free encoding. The number of
* units to be used for encoding a single level value is dynamically adjusted. We start with
* one unit and use its n - 1 lower bits. If the number exceeds the lower bits, we add another
* unit and set the highest bit to 1. This process is repeated for larger numbers. As a result,
* the first 1 bits of a level id indicate the number of fixed-size units used for encoding a level id.
* We thus don't need separator bits between the units themselves.
*
* @author wolf
*
*/
public class DLNBase {
/**
* The default number of bits used per fixed
* size unit.
*/
public final static int BITS_PER_UNIT = 4;
public final static int[] BIT_MASK = new int[8];
static {
BIT_MASK[0] = 0x80;
for (int i = 1; i < 8; i++) {
int mask = 1 << (7 - i);
BIT_MASK[i] = mask + BIT_MASK[i - 1];
}
}
/**
* Lists the maximum number that can be encoded
* by a given number of units. PER_COMPONENT_SIZE[0]
* corresponds to 1 unit used, PER_COMPONENT_SIZE[1]
* to 2 units, and so on. With BITS_PER_UNIT = 4, the largest
* number to be encoded by 1 unit is 7, for 2 units it's 71, for
* 3 units 583 ...
*/
protected final static int[] PER_COMPONENT_SIZE = initComponents();
private final static int[] initComponents() {
int size[] = new int[10];
size[0] = 7; // = Math.pow(2, 3) - 1;
int components, numBits;
for (int i = 1; i < size.length; i++) {
components = i + 1;
numBits = components * BITS_PER_UNIT - components;
size[i] = (int)(Math.pow(2, numBits)) + size[i - 1];
}
return size;
}
protected final static int UNIT_SHIFT = 3;
/** A 0-bit is used to mark the start of a new level */
protected final static int LEVEL_SEPARATOR = 0;
/**
* A 1-bit marks the start of a sub level, which is logically a part
* of the current level.
*/
protected final static int SUBLEVEL_SEPARATOR = 1;
// the bits are stored in a byte[]
protected byte[] bits;
// the current index into the byte[] used
// for appending new bits
protected int bitIndex = -1;
public DLNBase() {
bits = new byte[1];
}
public DLNBase(DLNBase dln) {
this.bits = new byte[dln.bits.length];
System.arraycopy(dln.bits, 0, this.bits, 0, dln.bits.length);
this.bitIndex = dln.bitIndex;
}
public DLNBase(int units, byte[] data, int startOffset) {
if (units < 0)
throw new IllegalArgumentException("Negative size for DLN: " + units);
int blen = units / 8;
if (units % 8 > 0)
++blen;
bits = new byte[blen];
System.arraycopy(data, startOffset, bits, 0, blen);
bitIndex = units - 1;
}
protected DLNBase(byte[] data, int nbits) {
int remainder = nbits % 8;
int len = nbits / 8;
bits = new byte[len + (remainder > 0 ? 1 : 0)];
if (len > 0)
System.arraycopy(data, 0, bits, 0, len);
if (remainder > 0) {
byte b = 0;
for (int i = 0; i < remainder; i++) {
if ((data[len] & (1 << ((7 - i) & 7))) != 0) {
b |= 1 << (7 - i);
}
}
bits[len] = b;
}
bitIndex = nbits - 1;
}
public DLNBase(short bitCnt, VariableByteInput is) throws IOException {
int blen = bitCnt / 8;
if (bitCnt % 8 > 0)
++blen;
bits = new byte[blen];
is.read(bits);
bitIndex = bitCnt - 1;
}
public DLNBase(byte prefixLen, DLNBase previous, short bitCnt, VariableByteInput is) throws IOException {
int blen = bitCnt / 8;
if (bitCnt % 8 > 0)
++blen;
bits = new byte[blen];
if (previous.bits.length < prefixLen)
throw new IOException("Found wrong prefix len: " + prefixLen + ". Previous: " + previous.toString());
System.arraycopy(previous.bits, 0, bits, 0, prefixLen);
is.read(bits, prefixLen, blen - prefixLen);
bitIndex = bitCnt - 1;
}
/**
* Set the level id which starts at offset to the
* given id value.
*
* @param offset
* @param levelId
*/
public void setLevelId(int offset, int levelId) {
bitIndex = offset - 1;
setCurrentLevelId(levelId);
}
/**
* Adds a new level to the node id, using levelId
* as initial value.
*
* @param levelId initial value
*/
public void addLevelId(int levelId, boolean isSubLevel) {
if (bitIndex > -1) setNextBit(isSubLevel);
setCurrentLevelId(levelId);
}
/**
* Increments the last level id by one.
*/
public void incrementLevelId() {
int last = lastFieldPosition();
bitIndex = last - 1;
setCurrentLevelId(getLevelId(last) + 1);
}
public void decrementLevelId() {
final int last = lastFieldPosition();
bitIndex = last - 1;
int levelId = getLevelId(last) - 1;
if (levelId < 1)
levelId = 0;
setCurrentLevelId(levelId);
// after decrementing, the DLN may need less bytes
// than before. Remove the unused bytes, otherwise binary
// comparisons may get wrong.
int len = bitIndex + 1;
int blen = len / 8;
if (len % 8 > 0)
++blen;
if (blen < bits.length) {
byte[] nbits = new byte[blen];
System.arraycopy(bits, 0, nbits, 0, blen);
bits = nbits;
}
}
/**
* Set the level id for the last level that has been written.
* The data array will be resized automatically if the bit set is
* too small to encode the id.
*
* @param levelId
*/
protected void setCurrentLevelId(int levelId) {
int units = getUnitsRequired(levelId);
int numBits = bitWidth(units);
if (units > 1)
levelId -= PER_COMPONENT_SIZE[units - 2];
for (int i = 1; i < units; i++) {
setNextBit(true);
}
setNextBit(false);
for (int i = numBits - 1; i >= 0; i--) {
setNextBit(((levelId >>> i) & 1) != 0);
}
}
/**
* Returns the id starting at offset.
*
* @param startBit
* @return the level id
*/
public int getLevelId(int startBit) {
int units = unitsUsed(startBit, bits);
startBit += units;
int numBits = bitWidth(units);
// System.err.println("startBit: " + startBit + "; bitIndex: " + bitIndex +
// "; units: " + units + ": numBits: " + numBits + " " + toBitString() +
// "; bits: " + bits.length);
int id = 0;
for (int i = numBits - 1; i >= 0; i--) {
if ((bits[startBit >> UNIT_SHIFT] & (1 << ((7 - startBit++) & 7))) != 0) {
id |= 1 << i;
}
}
if (units > 1)
id += PER_COMPONENT_SIZE[units - 2];
return id;
}
/**
* Returns the number of units currently used
* to encode the id. The size of a single unit is
* given by {@link #BITS_PER_UNIT}.
*
* @return the number of units
*/
public int units() {
return bitIndex + 1;
}
/**
* Returns the size of this id by counting the bytes
* used to encode it.
*
* @return the size in bytes
*/
public int size() {
return bits.length;
}
private static int unitsUsed(int startBit, byte[] bits) {
int units = 1;
while ((bits[startBit >> UNIT_SHIFT] & (1 << ((7 - startBit++) & 7))) != 0) {
++units;
}
return units;
}
public boolean isLevelSeparator(int index) {
return (bits[index >> UNIT_SHIFT] & (1 << ((7 - index) & 7))) == 0;
}
/**
* Returns the number of level in this id, which corresponds
* to the depth at which the node occurs within the node tree.
*
* @return the number of levels in this id
*/
public int getLevelCount(int startOffset) {
int bit = startOffset;
int count = 0;
while (bit > -1 && bit <= bitIndex) {
int units = unitsUsed(bit, bits);
bit += units;
bit += bitWidth(units);
if (bit < bitIndex) {
if ((bits[bit >> UNIT_SHIFT] & (1 << ((7 - bit++) & 7))) == LEVEL_SEPARATOR)
++count;
} else
++count;
}
return count;
}
/**
* Returns the number of sub-levels in the id starting at
* startOffset. This is required to determine where a node
* can be inserted.
*
* @param startOffset
* @return number of sub-levels
*/
public int getSubLevelCount(int startOffset) {
int bit = startOffset;
int count = 0;
while (bit > -1 && bit <= bitIndex) {
int units = unitsUsed(bit, bits);
bit += units;
bit += bitWidth(units);
if (bit < bitIndex) {
++count;
if ((bits[bit >> UNIT_SHIFT] & (1 << ((7 - bit++) & 7))) == LEVEL_SEPARATOR)
break;
} else
++count;
}
return count;
}
/**
* Return all level ids converted to int.
*
* @return all level ids in this node id.
*/
public int[] getLevelIds() {
int count = getLevelCount(0);
int[] ids = new int[count];
int offset = 0;
for (int i = 0; i < count; i++) {
ids[i] = getLevelId(offset);
offset += getUnitsRequired(ids[i]) * BITS_PER_UNIT;
}
return ids;
}
/**
* Find the last level in the id and return its offset.
*
* @return start-offset of the last level id.
*/
public int lastLevelOffset() {
int bit = 0;
int lastOffset = 0;
while (bit <= bitIndex) {
// check if the next bit starts a new level or just a sub-level component
if (bit > 0) {
if ((bits[bit >> UNIT_SHIFT] & (1 << ((7 - bit) & 7))) == LEVEL_SEPARATOR)
lastOffset = bit + 1;
++bit;
}
int units = unitsUsed(bit, bits);
bit += units;
bit += bitWidth(units);
}
return lastOffset;
}
protected int lastFieldPosition() {
int bit = 0;
int lastOffset = 0;
while (bit <= bitIndex) {
if (bit > 0)
lastOffset = ++bit;
int units = unitsUsed(bit, bits);
bit += units;
bit += bitWidth(units);
}
return lastOffset;
}
/**
* Set (or unset) the next bit in the current sequence
* of bits. The current position is moved forward and the
* bit set is resized if necessary.
*
* @param value the value of the bit to set, i.e. 1 (true) or 0 (false)
*/
private void setNextBit(boolean value) {
++bitIndex;
if ((bitIndex >> UNIT_SHIFT) >= bits.length) {
byte[] new_bits = new byte[bits.length + 1];
System.arraycopy(bits, 0, new_bits, 0, bits.length);
bits = new_bits;
}
if (value)
bits[bitIndex >> UNIT_SHIFT] |= 1 << ((7 - bitIndex) & 7);
else
bits[bitIndex >> UNIT_SHIFT] &= ~(1 << ((7 - bitIndex) & 7));
}
/**
* Calculates the number of bits available in a bit set
* that uses the given number of units. These are the bits
* that can be actually used for the id, not including the
* trailing address bits.
*
* @param units
* @return number of bits available
*/
protected static int bitWidth(int units) {
return (units * BITS_PER_UNIT) - units;
}
/**
* Calculates the minimum number of units that would be required
* to properly encode the given integer.
*
* @param levelId the integer to encode in the level id
* @return number of units required
*/
protected static int getUnitsRequired(int levelId) {
for (int i = 0; i < PER_COMPONENT_SIZE.length; i++) {
if (levelId < PER_COMPONENT_SIZE[i])
return i + 1;
}
// can't happen
throw new IllegalStateException("Number of nodes exceeds the internal limit");
}
protected void compact() {
int units = bitIndex + 1;
int blen = units / 8;
if (units % 8 > 0)
++blen;
byte[] nbits = new byte[blen];
System.arraycopy(bits, 0, nbits, 0, blen);
this.bits = nbits;
}
public void serialize(byte[] data, int offset) {
System.arraycopy(bits, 0, data, offset, bits.length);
}
public static int getLengthInBytes(int units, byte[] data, int startOffset) {
return (int) Math.ceil(units / 8.0);
}
public boolean equals(DLNBase other) {
if (bitIndex != other.bitIndex)
return false;
return Arrays.equals(bits, other.bits);
}
// public int compareTo(final DLNBase other) {
// if (other == null)
// return 1;
// final int a1len = bits.length;
// final int a2len = other.bits.length;
//
// int limit = a1len <= a2len ? a1len : a2len;
// byte[] obits = other.bits;
// for (int i = 0; i < limit; i++) {
// byte b1 = bits[i];
// byte b2 = obits[i];
// if (b1 != b2)
// return (b1 & 0xFF) - (b2 & 0xFF);
// }
// return (a1len - a2len);
// }
//
// public int compareTo(Object obj) {
// DLNBase other = (DLNBase) obj;
// return compareTo(other);
// }
public int compareBits(DLNBase other, int bitCount) {
int bytes = bitCount / 8;
int remaining = bitCount % 8;
for (int i = 0; i < bytes; i++) {
if (bits[i] != other.bits[i])
return (bits[i] & 0xFF) - (other.bits[i] & 0xFF);
}
return (bits[bytes] & BIT_MASK[remaining]) -
(other.bits[bytes] & BIT_MASK[remaining]);
}
/**
* Checks if the current DLN starts with the
* same bit sequence as other. This is used
* to test ancestor-descendant relationships.
*
* @param other
*/
public boolean startsWith(DLNBase other) {
if (other.bitIndex > bitIndex)
return false;
int bytes = other.bitIndex / 8;
int remaining = other.bitIndex % 8;
for (int i = 0; i < bytes; i++) {
if (bits[i] != other.bits[i])
return false;
}
return (bits[bytes] & BIT_MASK[remaining]) == (other.bits[bytes] & BIT_MASK[remaining]);
}
public String debug() {
StringBuilder buf = new StringBuilder();
buf.append(toString());
buf.append(" = ");
buf.append(toBitString());
buf.append(" [");
buf.append(bitIndex + 1);
buf.append(']');
return buf.toString();
}
public String toString() {
StringBuilder buf = new StringBuilder();
int offset = 0;
while (offset <= bitIndex) {
if (offset > 0) {
if ((bits[offset >> UNIT_SHIFT] & (1 << ((7 - offset++) & 7))) == 0)
buf.append('.');
else
buf.append('/');
}
int id = getLevelId(offset);
buf.append(id);
offset += getUnitsRequired(id) * BITS_PER_UNIT;
}
return buf.toString();
}
public String toBitString() {
StringBuilder buf = new StringBuilder();
int len = bits.length;
for (int i = 0; i < len; i++) {
buf.append(toBitString(bits[i]));
}
return buf.toString();
}
private final static char[] digits = { '0', '1' };
/**
* Returns a string showing the bit representation
* of the given byte.
*
* @param b the byte to display
* @return string representation
*/
public final static String toBitString(byte b) {
char[] buf = new char[8];
int charPos = 8;
int radix = 2;
int mask = radix - 1;
for (int i = 0; i < 8; i++) {
buf[--charPos] = digits[b & mask];
b >>>= 1;
}
return new String(buf);
}
}