/*
* Copyright 2006-2010 Amazon Technologies, Inc. or its affiliates.
* Amazon, Amazon.com and Carbonado are trademarks or registered trademarks
* of Amazon Technologies, Inc. or its affiliates. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.taobao.tddl.optimizer.core.datatype;
import static com.taobao.tddl.optimizer.core.datatype.EncodingConstants.NOT_NULL_BYTE_LOW;
import static com.taobao.tddl.optimizer.core.datatype.EncodingConstants.NULL_BYTE_HIGH;
import static com.taobao.tddl.optimizer.core.datatype.EncodingConstants.NULL_BYTE_LOW;
import static com.taobao.tddl.optimizer.core.datatype.EncodingConstants.ONE_HUNDRED;
import static com.taobao.tddl.optimizer.core.datatype.EncodingConstants.ONE_THOUSAND;
import static com.taobao.tddl.optimizer.core.datatype.EncodingConstants.TERMINATOR;
import java.math.BigDecimal;
import java.math.BigInteger;
/**
* A very low-level class that supports encoding of primitive data into unique,
* sortable byte array keys. If the data to encode is of a variable size, then
* it is written in base-32768, using only byte values 32..223. This allows
* special values such as nulls and terminators to be unambiguously encoded.
* Terminators for variable data can be encoded using 1 for ascending order and
* 254 for descending order. Nulls can be encoded as 255 for high ordering and 0
* for low ordering.
*
* @author Brian S O'Neill
* @see KeyDecoder
* @see DataEncoder
*/
public class KeyEncoder {
/**
* Encodes the given signed integer into exactly 4 bytes for descending
* order.
*
* @param value signed integer value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(int value, byte[] dst, int dstOffset) {
DataEncoder.encode(~value, dst, dstOffset);
}
/**
* Encodes the given signed Integer object into exactly 1 or 5 bytes for
* descending order. If the Integer object is never expected to be null,
* consider encoding as an int primitive.
*
* @param value optional signed Integer value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encodeDesc(Integer value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
return 1;
} else {
dst[dstOffset] = NOT_NULL_BYTE_LOW;
DataEncoder.encode(~value.intValue(), dst, dstOffset + 1);
return 5;
}
}
/**
* Encodes the given signed long into exactly 8 bytes for descending order.
*
* @param value signed long value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(long value, byte[] dst, int dstOffset) {
DataEncoder.encode(~value, dst, dstOffset);
}
/**
* Encodes the given signed Long object into exactly 1 or 9 bytes for
* descending order. If the Long object is never expected to be null,
* consider encoding as a long primitive.
*
* @param value optional signed Long value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encodeDesc(Long value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
return 1;
} else {
dst[dstOffset] = NOT_NULL_BYTE_LOW;
DataEncoder.encode(~value.longValue(), dst, dstOffset + 1);
return 9;
}
}
/**
* Encodes the given signed byte into exactly 1 byte for descending order.
*
* @param value signed byte value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(byte value, byte[] dst, int dstOffset) {
dst[dstOffset] = (byte) (value ^ 0x7f);
}
/**
* Encodes the given signed Byte object into exactly 1 or 2 bytes for
* descending order. If the Byte object is never expected to be null,
* consider encoding as a byte primitive.
*
* @param value optional signed Byte value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encodeDesc(Byte value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
return 1;
} else {
dst[dstOffset] = NOT_NULL_BYTE_LOW;
dst[dstOffset + 1] = (byte) (value ^ 0x7f);
return 2;
}
}
/**
* Encodes the given signed short into exactly 2 bytes for descending order.
*
* @param value signed short value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(short value, byte[] dst, int dstOffset) {
DataEncoder.encode((short) ~value, dst, dstOffset);
}
/**
* Encodes the given signed Short object into exactly 1 or 3 bytes for
* descending order. If the Short object is never expected to be null,
* consider encoding as a short primitive.
*
* @param value optional signed Short value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encodeDesc(Short value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
return 1;
} else {
dst[dstOffset] = NOT_NULL_BYTE_LOW;
DataEncoder.encode((short) ~value.shortValue(), dst, dstOffset + 1);
return 3;
}
}
/**
* Encodes the given character into exactly 2 bytes for descending order.
*
* @param value character value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(char value, byte[] dst, int dstOffset) {
DataEncoder.encode((char) ~value, dst, dstOffset);
}
/**
* Encodes the given Character object into exactly 1 or 3 bytes for
* descending order. If the Character object is never expected to be null,
* consider encoding as a char primitive.
*
* @param value optional Character value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encodeDesc(Character value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
return 1;
} else {
dst[dstOffset] = NOT_NULL_BYTE_LOW;
DataEncoder.encode((char) ~value.charValue(), dst, dstOffset + 1);
return 3;
}
}
/**
* Encodes the given boolean into exactly 1 byte for descending order.
*
* @param value boolean value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(boolean value, byte[] dst, int dstOffset) {
dst[dstOffset] = value ? (byte) 127 : (byte) 128;
}
/**
* Encodes the given Boolean object into exactly 1 byte for descending
* order.
*
* @param value optional Boolean value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(Boolean value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
} else {
dst[dstOffset] = value.booleanValue() ? (byte) 127 : (byte) 128;
}
}
/**
* Encodes the given float into exactly 4 bytes for descending order.
*
* @param value float value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(float value, byte[] dst, int dstOffset) {
int bits = Float.floatToIntBits(value);
if (bits >= 0) {
bits ^= 0x7fffffff;
}
dst[dstOffset] = (byte) (bits >> 24);
dst[dstOffset + 1] = (byte) (bits >> 16);
dst[dstOffset + 2] = (byte) (bits >> 8);
dst[dstOffset + 3] = (byte) bits;
}
/**
* Encodes the given Float object into exactly 4 bytes for descending order.
* A non-canonical NaN value is used to represent null.
*
* @param value optional Float value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(Float value, byte[] dst, int dstOffset) {
if (value == null) {
DataEncoder.encode(~0x7fffffff, dst, dstOffset);
} else {
encodeDesc(value.floatValue(), dst, dstOffset);
}
}
/**
* Encodes the given double into exactly 8 bytes for descending order.
*
* @param value double value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(double value, byte[] dst, int dstOffset) {
long bits = Double.doubleToLongBits(value);
if (bits >= 0) {
bits ^= 0x7fffffffffffffffL;
}
int w = (int) (bits >> 32);
dst[dstOffset] = (byte) (w >> 24);
dst[dstOffset + 1] = (byte) (w >> 16);
dst[dstOffset + 2] = (byte) (w >> 8);
dst[dstOffset + 3] = (byte) w;
w = (int) bits;
dst[dstOffset + 4] = (byte) (w >> 24);
dst[dstOffset + 5] = (byte) (w >> 16);
dst[dstOffset + 6] = (byte) (w >> 8);
dst[dstOffset + 7] = (byte) w;
}
/**
* Encodes the given Double object into exactly 8 bytes for descending
* order. A non-canonical NaN value is used to represent null.
*
* @param value optional Double value to encode
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
*/
public static void encodeDesc(Double value, byte[] dst, int dstOffset) {
if (value == null) {
DataEncoder.encode(~0x7fffffffffffffffL, dst, dstOffset);
} else {
encodeDesc(value.doubleValue(), dst, dstOffset);
}
}
/**
* Encodes the given optional BigInteger into a variable amount of bytes. If
* the BigInteger is null, exactly 1 byte is written. Otherwise, the amount
* written can be determined by calling calculateEncodedLength.
*
* @param value BigInteger value to encode, may be null
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
* @since 1.2
*/
public static int encode(BigInteger value, byte[] dst, int dstOffset) {
/*
* Encoding of first byte: 0x00: null low (unused) 0x01: negative
* signum; four bytes follow for value length 0x02..0x7f: negative
* signum; value length 7e range, 1..126 0x80..0xfd: positive signum;
* value length 7e range, 1..126 0xfe: positive signum; four bytes
* follow for value length 0xff: null high
*/
if (value == null) {
dst[dstOffset] = NULL_BYTE_HIGH;
return 1;
}
byte[] bytes = value.toByteArray();
// Always at least one.
int bytesLength = bytes.length;
int headerSize;
if (bytesLength < 0x7f) {
if (value.signum() < 0) {
dst[dstOffset] = (byte) (0x80 - bytesLength);
} else {
dst[dstOffset] = (byte) (bytesLength + 0x7f);
}
headerSize = 1;
} else {
dst[dstOffset] = (byte) (value.signum() < 0 ? 1 : 0xfe);
int encodedLen = value.signum() < 0 ? -bytesLength : bytesLength;
DataEncoder.encode(encodedLen, dst, dstOffset + 1);
headerSize = 5;
}
System.arraycopy(bytes, 0, dst, headerSize + dstOffset, bytesLength);
return headerSize + bytesLength;
}
/**
* Encodes the given optional BigInteger into a variable amount of bytes for
* descending order. If the BigInteger is null, exactly 1 byte is written.
* Otherwise, the amount written can be determined by calling
* calculateEncodedLength.
*
* @param value BigInteger value to encode, may be null
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
* @since 1.2
*/
public static int encodeDesc(BigInteger value, byte[] dst, int dstOffset) {
/*
* Encoding of first byte: 0x00: null high (unused) 0x01: positive
* signum; four bytes follow for value length 0x02..0x7f: positive
* signum; value length 7e range, 1..126 0x80..0xfd: negative signum;
* value length 7e range, 1..126 0xfe: negative signum; four bytes
* follow for value length 0xff: null low
*/
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
return 1;
}
byte[] bytes = value.toByteArray();
// Always at least one.
int bytesLength = bytes.length;
int headerSize;
if (bytesLength < 0x7f) {
if (value.signum() < 0) {
dst[dstOffset] = (byte) (bytesLength + 0x7f);
} else {
dst[dstOffset] = (byte) (0x80 - bytesLength);
}
headerSize = 1;
} else {
dst[dstOffset] = (byte) (value.signum() < 0 ? 0xfe : 1);
int encodedLen = value.signum() < 0 ? bytesLength : -bytesLength;
DataEncoder.encode(encodedLen, dst, dstOffset + 1);
headerSize = 5;
}
dstOffset += headerSize;
for (int i = 0; i < bytesLength; i++) {
dst[dstOffset + i] = (byte) ~bytes[i];
}
return headerSize + bytesLength;
}
/**
* Returns the amount of bytes required to encode a BigInteger.
*
* @param value BigInteger value to encode, may be null
* @return amount of bytes needed to encode
* @since 1.2
*/
public static int calculateEncodedLength(BigInteger value) {
if (value == null) {
return 1;
}
int bytesLength = (value.bitLength() >> 3) + 1;
return bytesLength < 0x7f ? (1 + bytesLength) : (5 + bytesLength);
}
/**
* Encodes the given optional BigDecimal into a variable amount of bytes. If
* the BigDecimal is null, exactly 1 byte is written. Otherwise, the amount
* written can be determined by calling calculateEncodedLength.
* <p>
* <i>Note:</i> It is recommended that value be normalized by stripping
* trailing zeros. This makes searching by value much simpler.
*
* @param value BigDecimal value to encode, may be null
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
* @since 1.2
*/
public static int encode(BigDecimal value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_HIGH;
return 1;
}
if (value.signum() == 0) {
dst[dstOffset] = (byte) 0x80;
return 1;
}
return encode(value).copyTo(dst, dstOffset);
}
/**
* Encodes the given optional BigDecimal into a variable amount of bytes for
* descending order. If the BigDecimal is null, exactly 1 byte is written.
* Otherwise, the amount written can be determined by calling
* calculateEncodedLength.
* <p>
* <i>Note:</i> It is recommended that value be normalized by stripping
* trailing zeros. This makes searching by value much simpler.
*
* @param value BigDecimal value to encode, may be null
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
* @since 1.2
*/
public static int encodeDesc(BigDecimal value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
return 1;
}
if (value.signum() == 0) {
dst[dstOffset] = (byte) 0x7f;
return 1;
}
return encode(value).copyDescTo(dst, dstOffset);
}
/**
* Returns the amount of bytes required to encode a BigDecimal.
* <p>
* <i>Note:</i> It is recommended that value be normalized by stripping
* trailing zeros. This makes searching by value much simpler.
*
* @param value BigDecimal value to encode, may be null
* @return amount of bytes needed to encode
* @since 1.2
*/
public static int calculateEncodedLength(BigDecimal value) {
if (value == null || value.signum() == 0) {
return 1;
}
return encode(value).mLength;
}
private static class CachedBigDecimal {
static final ThreadLocal<CachedBigDecimal> cLocal = new ThreadLocal<CachedBigDecimal>();
final BigDecimal mValue;
final byte[] mEncoded;
final int mLength;
CachedBigDecimal(BigDecimal value, byte[] encoded, int length){
mValue = value;
mEncoded = encoded;
mLength = length;
}
int copyTo(byte[] dst, int dstOffset) {
int length = mLength;
System.arraycopy(mEncoded, 0, dst, dstOffset, length);
return length;
}
int copyDescTo(byte[] dst, int dstOffset) {
byte[] encoded = mEncoded;
int length = mLength;
for (int i = 0; i < length; i++) {
dst[dstOffset++] = (byte) ~encoded[i];
}
return length;
}
}
/**
* @param value cannot be null or zero
* @return non-null cached encoding
*/
private static CachedBigDecimal encode(BigDecimal value) {
CachedBigDecimal cached = CachedBigDecimal.cLocal.get();
if (cached != null && cached.mValue.equals(value)) {
return cached;
}
// Exactly predicting encoding length is hard. Instead, overestimate
// and compare with actual encoded result. Result is cached to avoid
// recomputation.
// 5: maximum header encoding length
// 1: extra byte for last digit
// 10: bits for rare extra digit
// 10: bits for terminator digit
int length = (5 + 1) + (((value.unscaledValue().bitLength() + (10 + 10)) + 7) >> 3);
byte[] encoded = new byte[length];
length = encodeUncached(value, encoded);
cached = new CachedBigDecimal(value, encoded, length);
CachedBigDecimal.cLocal.set(cached);
return cached;
}
/**
* @param value cannot be null or zero
*/
private static int encodeUncached(BigDecimal value, byte[] dst) {
/*
* Encoding of header: 0x00: null low (unused) 0x01: negative signum;
* four bytes follow for positive exponent 0x02..0x3f: negative signum;
* positive exponent; 3e range, 61..0 0x40..0x7d: negative signum;
* negative exponent; 3e range, -1..-62 0x7e: negative signum; four
* bytes follow for negative exponent 0x7f: negative zero (unused) 0x80:
* zero 0x81: positive signum; four bytes follow for negative exponent
* 0x82..0xbf: positive signum; negative exponent; 3e range, -62..-1
* 0xc0..0xfd: positive signum; positive exponent; 3e range, 0..61 0xfe:
* positive signum; four bytes follow for positive exponent 0xff: null
* high
*/
int dstOffset = 0;
int precision = value.precision();
int exponent = precision - value.scale();
if (value.signum() < 0) {
if (exponent >= -0x3e && exponent < 0x3e) {
dst[dstOffset++] = (byte) (0x3f - exponent);
} else {
if (exponent < 0) {
dst[dstOffset] = (byte) 0x7e;
} else {
dst[dstOffset] = (byte) 1;
}
DataEncoder.encode(~exponent, dst, dstOffset + 1);
dstOffset += 5;
}
} else {
if (exponent >= -0x3e && exponent < 0x3e) {
dst[dstOffset++] = (byte) (exponent + 0xc0);
} else {
if (exponent < 0) {
dst[dstOffset] = (byte) 0x81;
} else {
dst[dstOffset] = (byte) 0xfe;
}
DataEncoder.encode(exponent, dst, dstOffset + 1);
dstOffset += 5;
}
}
// Significand must be decimal encoded to maintain proper sort order.
// Base 1000 is more efficient than base 10 and still maintains proper
// sort order. A minimum of two bytes must be generated, however.
BigInteger unscaledValue = value.unscaledValue();
// Ensure a non-fractional amount of base 1000 digits.
int terminator;
switch (precision % 3) {
case 0:
default:
terminator = 2;
break;
case 1:
terminator = 0;
unscaledValue = unscaledValue.multiply(ONE_HUNDRED);
break;
case 2:
terminator = 1;
unscaledValue = unscaledValue.multiply(BigInteger.TEN);
break;
}
// 10 bits per digit and 1 extra terminator digit. Digit values 0..999
// are encoded as 12..1011. Digit values 0..11 and 1012..1023 are used
// for terminators.
int digitAdjust;
if (unscaledValue.signum() >= 0) {
digitAdjust = 12;
} else {
digitAdjust = 999 + 12;
terminator = 1023 - terminator;
}
int pos = ((unscaledValue.bitLength() + 9) / 10) + 1;
int[] digits = new int[pos];
digits[--pos] = terminator;
while (unscaledValue.signum() != 0) {
BigInteger[] divrem = unscaledValue.divideAndRemainder(ONE_THOUSAND);
if (--pos < 0) {
// Handle rare case when an extra digit is required.
int[] newDigits = new int[digits.length + 1];
System.arraycopy(digits, 0, newDigits, 1, digits.length);
digits = newDigits;
pos = 0;
}
digits[pos] = divrem[1].intValue() + digitAdjust;
unscaledValue = divrem[0];
}
// Now encode digits in proper order, 10 bits per digit. 1024 possible
// values per 10 bits, and so base 1000 is quite efficient.
int accum = 0;
int bits = 0;
for (int i = 0; i < digits.length; i++) {
accum = (accum << 10) | digits[i];
bits += 10;
do {
dst[dstOffset++] = (byte) (accum >> (bits -= 8));
} while (bits >= 8);
}
if (bits != 0) {
dst[dstOffset++] = (byte) (accum << (8 - bits));
}
return dstOffset;
}
/**
* Encodes the given optional unsigned byte array into a variable amount of
* bytes. If the byte array is null, exactly 1 byte is written. Otherwise,
* the amount written can be determined by calling calculateEncodedLength.
*
* @param value byte array value to encode, may be null
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encode(byte[] value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_HIGH;
return 1;
}
return encode(value, 0, value.length, dst, dstOffset, 0);
}
/**
* Encodes the given optional unsigned byte array into a variable amount of
* bytes. If the byte array is null, exactly 1 byte is written. Otherwise,
* the amount written can be determined by calling calculateEncodedLength.
*
* @param value byte array value to encode, may be null
* @param valueOffset offset into byte array
* @param valueLength length of data in byte array
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encode(byte[] value, int valueOffset, int valueLength, byte[] dst, int dstOffset) {
return encode(value, valueOffset, valueLength, dst, dstOffset, 0);
}
/**
* Encodes the given optional unsigned byte array into a variable amount of
* bytes for descending order. If the byte array is null, exactly 1 byte is
* written. Otherwise, the amount written is determined by calling
* calculateEncodedLength.
*
* @param value byte array value to encode, may be null
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encodeDesc(byte[] value, byte[] dst, int dstOffset) {
if (value == null) {
dst[dstOffset] = NULL_BYTE_LOW;
return 1;
}
return encode(value, 0, value.length, dst, dstOffset, -1);
}
/**
* Encodes the given optional unsigned byte array into a variable amount of
* bytes for descending order. If the byte array is null, exactly 1 byte is
* written. Otherwise, the amount written is determined by calling
* calculateEncodedLength.
*
* @param value byte array value to encode, may be null
* @param valueOffset offset into byte array
* @param valueLength length of data in byte array
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encodeDesc(byte[] value, int valueOffset, int valueLength, byte[] dst, int dstOffset) {
return encode(value, valueOffset, valueLength, dst, dstOffset, -1);
}
/**
* @param xorMask 0 for normal encoding, -1 for descending encoding
*/
private static int encode(byte[] value, int valueOffset, int valueLength, byte[] dst, int dstOffset, int xorMask) {
if (value == null) {
dst[dstOffset] = (byte) (NULL_BYTE_HIGH ^ xorMask);
return 1;
}
final int originalOffset = dstOffset;
// Value is encoded in base-32768.
int accumBits = 0;
int accum = 0;
final int end = valueOffset + valueLength;
for (int i = valueOffset; i < end; i++) {
if (accumBits <= 7) {
accumBits += 8;
accum = (accum << 8) | (value[i] & 0xff);
if (accumBits == 15) {
emitDigit(accum, dst, dstOffset, xorMask);
dstOffset += 2;
accum = 0;
accumBits = 0;
}
} else {
int supply = 15 - accumBits;
accum = (accum << supply) | ((value[i] & 0xff) >> (8 - supply));
emitDigit(accum, dst, dstOffset, xorMask);
dstOffset += 2;
accumBits = 8 - supply;
accum = value[i] & ((1 << accumBits) - 1);
}
}
if (accumBits > 0) {
// Pad with zeros.
accum <<= (15 - accumBits);
if (accumBits <= 7) {
// Since amount of significant bits is small, emit only the
// upper half of the digit. The following code is modified from
// emitDigit.
int a = (accum * 21845) >> 22;
if (accum - ((a << 7) + (a << 6)) == 192) {
a++;
}
dst[dstOffset++] = (byte) ((a + 32) ^ xorMask);
} else {
emitDigit(accum, dst, dstOffset, xorMask);
dstOffset += 2;
}
}
// Append terminator.
dst[dstOffset++] = (byte) (TERMINATOR ^ xorMask);
return dstOffset - originalOffset;
}
/**
* Emits a base-32768 digit using exactly two bytes. The first byte is in
* the range 32..202 and the second byte is in the range 32..223.
*
* @param value digit value in the range 0..32767
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @param xorMask 0 for normal encoding, -1 for descending encoding
*/
private static void emitDigit(int value, byte[] dst, int dstOffset, int xorMask) {
// The first byte is computed as ((value / 192) + 32) and the second
// byte is computed as ((value % 192) + 32). To speed things up a bit,
// the integer division and remainder operations are replaced with a
// scaled multiplication.
// approximate value / 192
int a = (value * 21845) >> 22;
// approximate value % 192
// Note: the value 192 was chosen as a divisor because a multiply by
// 192 can be replaced with two summed shifts.
int b = value - ((a << 7) + (a << 6));
if (b == 192) {
// Fix error.
a++;
b = 0;
}
dst[dstOffset++] = (byte) ((a + 32) ^ xorMask);
dst[dstOffset] = (byte) ((b + 32) ^ xorMask);
}
/**
* Returns the amount of bytes required to encode a byte array of the given
* length.
*
* @param value byte array value to encode, may be null
* @return amount of bytes needed to encode
*/
public static int calculateEncodedLength(byte[] value) {
return value == null ? 1 : calculateEncodedLength(value, 0, value.length);
}
/**
* Returns the amount of bytes required to encode the given byte array.
*
* @param value byte array value to encode, may be null
* @param valueOffset offset into byte array
* @param valueLength length of data in byte array
* @return amount of bytes needed to encode
*/
public static int calculateEncodedLength(byte[] value, int valueOffset, int valueLength) {
// The add of 119 is used to force ceiling rounding.
return value == null ? 1 : (((valueLength << 7) + 119) / 120 + 1);
}
/**
* Encodes the given optional String into a variable amount of bytes. The
* amount written can be determined by calling calculateEncodedStringLength.
* <p>
* Strings are encoded in a fashion similar to UTF-8, in that ASCII
* characters are usually written in one byte. This encoding is more
* efficient than UTF-8, but it isn't compatible with UTF-8.
*
* @param value String value to encode, may be null
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encode(String value, byte[] dst, int dstOffset) {
return encode(value, dst, dstOffset, 0);
}
/**
* Encodes the given optional String into a variable amount of bytes for
* descending order. The amount written can be determined by calling
* calculateEncodedStringLength.
* <p>
* Strings are encoded in a fashion similar to UTF-8, in that ASCII
* characters are usually written in one byte. This encoding is more
* efficient than UTF-8, but it isn't compatible with UTF-8.
*
* @param value String value to encode, may be null
* @param dst destination for encoded bytes
* @param dstOffset offset into destination array
* @return amount of bytes written
*/
public static int encodeDesc(String value, byte[] dst, int dstOffset) {
return encode(value, dst, dstOffset, -1);
}
/**
* @param xorMask 0 for normal encoding, -1 for descending encoding
*/
private static int encode(String value, byte[] dst, int dstOffset, int xorMask) {
if (value == null) {
dst[dstOffset] = (byte) (NULL_BYTE_HIGH ^ xorMask);
return 1;
}
final int originalOffset = dstOffset;
// All characters have an offset of 2 added, in order to reserve bytes
// 0 and 1 for encoding nulls and terminators. This means the ASCII
// string "HelloWorld" is actually encoded as "JgnnqYqtnf". This also
// means that the ASCII '~' and del characters are encoded in two bytes.
int length = value.length();
for (int i = 0; i < length; i++) {
int c = value.charAt(i) + 2;
if (c <= 0x7f) {
// 0xxxxxxx
dst[dstOffset++] = (byte) (c ^ xorMask);
} else if (c <= 12415) {
// 10xxxxxx xxxxxxxx
// Second byte cannot have the values 0, 1, 254, or 255 because
// they clash with null and terminator bytes. Divide by 192 and
// store in first 6 bits. The remainder, with 32 added, goes
// into the second byte. Note that (192 * 63 + 191) + 128 ==
// 12415.
// 63 is the maximum value that can be represented in 6 bits.
c -= 128; // c will always be at least 128, so normalize.
// approximate value / 192
int a = (c * 21845) >> 22;
// approximate value % 192
// Note: the value 192 was chosen as a divisor because a
// multiply by
// 192 can be replaced with two summed shifts.
c = c - ((a << 7) + (a << 6));
if (c == 192) {
// Fix error.
a++;
c = 0;
}
dst[dstOffset++] = (byte) ((0x80 | a) ^ xorMask);
dst[dstOffset++] = (byte) ((c + 32) ^ xorMask);
} else {
// 110xxxxx xxxxxxxx xxxxxxxx
if ((c - 2) >= 0xd800 && (c - 2) <= 0xdbff) {
// Found a high surrogate. Verify that surrogate pair is
// well-formed. Low surrogate must follow high surrogate.
if (i + 1 < length) {
int c2 = value.charAt(i + 1);
if (c2 >= 0xdc00 && c2 <= 0xdfff) {
c = ((((c - 2) & 0x3ff) << 10) | (c2 & 0x3ff)) + 0x10002;
i++;
}
}
}
// Second and third bytes cannot have the values 0, 1, 254, or
// 255 because they clash with null and terminator
// bytes. Divide by 192 twice, storing the first and second
// remainders in the third and second bytes, respectively.
// Note that largest unicode value supported is 2^20 + 65535 ==
// 1114111. When divided by 192 twice, the value is 30, which
// just barely fits in the 5 available bits of the first byte.
c -= 12416; // c will always be at least 12416, so normalize.
int a = (int) ((c * 21845L) >> 22);
c = c - ((a << 7) + (a << 6));
if (c == 192) {
a++;
c = 0;
}
dst[dstOffset + 2] = (byte) ((c + 32) ^ xorMask);
c = (a * 21845) >> 22;
a = a - ((c << 7) + (c << 6));
if (a == 192) {
c++;
a = 0;
}
dst[dstOffset++] = (byte) ((0xc0 | c) ^ xorMask);
dst[dstOffset++] = (byte) ((a + 32) ^ xorMask);
dstOffset++;
}
}
// Append terminator.
dst[dstOffset++] = (byte) (TERMINATOR ^ xorMask);
return dstOffset - originalOffset;
}
/**
* Returns the amount of bytes required to encode the given String.
*
* @param value String to encode, may be null
*/
public static int calculateEncodedStringLength(String value) {
int encodedLen = 1;
if (value != null) {
int valueLength = value.length();
for (int i = 0; i < valueLength; i++) {
int c = value.charAt(i);
if (c <= (0x7f - 2)) {
encodedLen++;
} else if (c <= (12415 - 2)) {
encodedLen += 2;
} else {
if (c >= 0xd800 && c <= 0xdbff) {
// Found a high surrogate. Verify that surrogate pair is
// well-formed. Low surrogate must follow high
// surrogate.
if (i + 1 < valueLength) {
int c2 = value.charAt(i + 1);
if (c2 >= 0xdc00 && c2 <= 0xdfff) {
i++;
}
}
}
encodedLen += 3;
}
}
}
return encodedLen;
}
/**
* Encodes the given byte array for use when there is only a single required
* property, descending order, whose type is a byte array. The original byte
* array is returned if the length is zero.
*/
public static byte[] encodeSingleDesc(byte[] value) {
return encodeSingleDesc(value, 0, 0);
}
/**
* Encodes the given byte array for use when there is only a single required
* property, descending order, whose type is a byte array. The original byte
* array is returned if the length and padding lengths are zero.
*
* @param prefixPadding amount of extra bytes to allocate at start of
* encoded byte array
* @param suffixPadding amount of extra bytes to allocate at end of encoded
* byte array
*/
public static byte[] encodeSingleDesc(byte[] value, int prefixPadding, int suffixPadding) {
int length = value.length;
if (prefixPadding <= 0 && suffixPadding <= 0 && length == 0) {
return value;
}
byte[] dst = new byte[prefixPadding + length + suffixPadding];
while (--length >= 0) {
dst[prefixPadding + length] = (byte) (~value[length]);
}
return dst;
}
/**
* Encodes the given byte array for use when there is only a single nullable
* property, descending order, whose type is a byte array.
*/
public static byte[] encodeSingleNullableDesc(byte[] value) {
return encodeSingleNullableDesc(value, 0, 0);
}
/**
* Encodes the given byte array for use when there is only a single nullable
* property, descending order, whose type is a byte array.
*
* @param prefixPadding amount of extra bytes to allocate at start of
* encoded byte array
* @param suffixPadding amount of extra bytes to allocate at end of encoded
* byte array
*/
public static byte[] encodeSingleNullableDesc(byte[] value, int prefixPadding, int suffixPadding) {
if (prefixPadding <= 0 && suffixPadding <= 0) {
if (value == null) {
return new byte[] { NULL_BYTE_LOW };
}
int length = value.length;
if (length == 0) {
return new byte[] { NOT_NULL_BYTE_LOW };
}
byte[] dst = new byte[1 + length];
dst[0] = NOT_NULL_BYTE_LOW;
while (--length >= 0) {
dst[1 + length] = (byte) (~value[length]);
}
return dst;
}
if (value == null) {
byte[] dst = new byte[prefixPadding + 1 + suffixPadding];
dst[prefixPadding] = NULL_BYTE_LOW;
return dst;
}
int length = value.length;
byte[] dst = new byte[prefixPadding + 1 + length + suffixPadding];
dst[prefixPadding] = NOT_NULL_BYTE_LOW;
while (--length >= 0) {
dst[prefixPadding + 1 + length] = (byte) (~value[length]);
}
return dst;
}
}