/* * Copyright 2006-2010 Amazon Technologies, Inc. or its affiliates. * Amazon, Amazon.com and Carbonado are trademarks or registered trademarks * of Amazon Technologies, Inc. or its affiliates. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.taobao.tddl.optimizer.core.datatype; import static com.taobao.tddl.optimizer.core.datatype.EncodingConstants.NOT_NULL_BYTE_HIGH; import static com.taobao.tddl.optimizer.core.datatype.EncodingConstants.NULL_BYTE_HIGH; import java.io.IOException; import java.io.OutputStream; import java.math.BigDecimal; import java.math.BigInteger; /** * A very low-level class that supports encoding of primitive data. For encoding * data into keys, see {@link KeyEncoder}. * * @author Brian S O'Neill * @see DataDecoder */ public class DataEncoder { // Note: Most of these methods are also used by KeyEncoder, which is why // they are encoded for supporting proper ordering. /** * Encodes the given signed integer into exactly 4 bytes. * * @param value signed integer value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(int value, byte[] dst, int dstOffset) { value ^= 0x80000000; dst[dstOffset] = (byte) (value >> 24); dst[dstOffset + 1] = (byte) (value >> 16); dst[dstOffset + 2] = (byte) (value >> 8); dst[dstOffset + 3] = (byte) value; } /** * Encodes the given signed Integer object into exactly 1 or 5 bytes. If the * Integer object is never expected to be null, consider encoding as an int * primitive. * * @param value optional signed Integer value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(Integer value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_HIGH; encode(value.intValue(), dst, dstOffset + 1); return 5; } } /** * Encodes the given signed long into exactly 8 bytes. * * @param value signed long value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(long value, byte[] dst, int dstOffset) { int w = ((int) (value >> 32)) ^ 0x80000000; dst[dstOffset] = (byte) (w >> 24); dst[dstOffset + 1] = (byte) (w >> 16); dst[dstOffset + 2] = (byte) (w >> 8); dst[dstOffset + 3] = (byte) w; w = (int) value; dst[dstOffset + 4] = (byte) (w >> 24); dst[dstOffset + 5] = (byte) (w >> 16); dst[dstOffset + 6] = (byte) (w >> 8); dst[dstOffset + 7] = (byte) w; } /** * Encodes the given signed Long object into exactly 1 or 9 bytes. If the * Long object is never expected to be null, consider encoding as a long * primitive. * * @param value optional signed Long value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(Long value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_HIGH; encode(value.longValue(), dst, dstOffset + 1); return 9; } } /** * Encodes the given signed byte into exactly 1 byte. * * @param value signed byte value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(byte value, byte[] dst, int dstOffset) { dst[dstOffset] = (byte) (value ^ 0x80); } /** * Encodes the given signed Byte object into exactly 1 or 2 bytes. If the * Byte object is never expected to be null, consider encoding as a byte * primitive. * * @param value optional signed Byte value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(Byte value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_HIGH; dst[dstOffset + 1] = (byte) (value ^ 0x80); return 2; } } /** * Encodes the given signed short into exactly 2 bytes. * * @param value signed short value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(short value, byte[] dst, int dstOffset) { value ^= 0x8000; dst[dstOffset] = (byte) (value >> 8); dst[dstOffset + 1] = (byte) value; } /** * Encodes the given signed Short object into exactly 1 or 3 bytes. If the * Short object is never expected to be null, consider encoding as a short * primitive. * * @param value optional signed Short value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(Short value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_HIGH; encode(value.shortValue(), dst, dstOffset + 1); return 3; } } /** * Encodes the given character into exactly 2 bytes. * * @param value character value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(char value, byte[] dst, int dstOffset) { dst[dstOffset] = (byte) (value >> 8); dst[dstOffset + 1] = (byte) value; } /** * Encodes the given Character object into exactly 1 or 3 bytes. If the * Character object is never expected to be null, consider encoding as a * char primitive. * * @param value optional Character value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(Character value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } else { dst[dstOffset] = NOT_NULL_BYTE_HIGH; encode(value.charValue(), dst, dstOffset + 1); return 3; } } /** * Encodes the given boolean into exactly 1 byte. * * @param value boolean value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(boolean value, byte[] dst, int dstOffset) { dst[dstOffset] = value ? (byte) 128 : (byte) 127; } /** * Encodes the given Boolean object into exactly 1 byte. * * @param value optional Boolean value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(Boolean value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; } else { dst[dstOffset] = value.booleanValue() ? (byte) 128 : (byte) 127; } } /** * Encodes the given float into exactly 4 bytes. * * @param value float value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(float value, byte[] dst, int dstOffset) { int bits = Float.floatToIntBits(value); bits ^= (bits < 0) ? 0xffffffff : 0x80000000; dst[dstOffset] = (byte) (bits >> 24); dst[dstOffset + 1] = (byte) (bits >> 16); dst[dstOffset + 2] = (byte) (bits >> 8); dst[dstOffset + 3] = (byte) bits; } /** * Encodes the given Float object into exactly 4 bytes. A non-canonical NaN * value is used to represent null. * * @param value optional Float value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(Float value, byte[] dst, int dstOffset) { if (value == null) { encode(0x7fffffff, dst, dstOffset); } else { encode(value.floatValue(), dst, dstOffset); } } /** * Encodes the given double into exactly 8 bytes. * * @param value double value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(double value, byte[] dst, int dstOffset) { long bits = Double.doubleToLongBits(value); bits ^= (bits < 0) ? 0xffffffffffffffffL : 0x8000000000000000L; int w = (int) (bits >> 32); dst[dstOffset] = (byte) (w >> 24); dst[dstOffset + 1] = (byte) (w >> 16); dst[dstOffset + 2] = (byte) (w >> 8); dst[dstOffset + 3] = (byte) w; w = (int) bits; dst[dstOffset + 4] = (byte) (w >> 24); dst[dstOffset + 5] = (byte) (w >> 16); dst[dstOffset + 6] = (byte) (w >> 8); dst[dstOffset + 7] = (byte) w; } /** * Encodes the given Double object into exactly 8 bytes. A non-canonical NaN * value is used to represent null. * * @param value optional Double value to encode * @param dst destination for encoded bytes * @param dstOffset offset into destination array */ public static void encode(Double value, byte[] dst, int dstOffset) { if (value == null) { encode(0x7fffffffffffffffL, dst, dstOffset); } else { encode(value.doubleValue(), dst, dstOffset); } } /** * Encodes the given optional BigInteger into a variable amount of bytes. If * the BigInteger is null, exactly 1 byte is written. Otherwise, the amount * written can be determined by calling calculateEncodedLength. * * @param value BigInteger value to encode, may be null * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written * @since 1.2 */ public static int encode(BigInteger value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } byte[] bytes = value.toByteArray(); // Write the byte array length first, in a variable amount of bytes. int amt = encodeUnsignedVarInt(bytes.length, dst, dstOffset); // Now write the byte array. System.arraycopy(bytes, 0, dst, dstOffset + amt, bytes.length); return amt + bytes.length; } /** * Returns the amount of bytes required to encode the given BigInteger. * * @param value BigInteger value to encode, may be null * @return amount of bytes needed to encode * @since 1.2 */ public static int calculateEncodedLength(BigInteger value) { if (value == null) { return 1; } int byteCount = (value.bitLength() >> 3) + 1; return unsignedVarIntLength(byteCount) + byteCount; } /** * Encodes the given optional BigDecimal into a variable amount of bytes. If * the BigDecimal is null, exactly 1 byte is written. Otherwise, the amount * written can be determined by calling calculateEncodedLength. * * @param value BigDecimal value to encode, may be null * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written * @since 1.2 */ public static int encode(BigDecimal value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } int amt = encodeSignedVarInt(value.scale(), dst, dstOffset); return amt + encode(value.unscaledValue(), dst, dstOffset + amt); } /** * Returns the amount of bytes required to encode the given BigDecimal. * * @param value BigDecimal value to encode, may be null * @return amount of bytes needed to encode * @since 1.2 */ public static int calculateEncodedLength(BigDecimal value) { if (value == null) { return 1; } return signedVarIntLength(value.scale()) + calculateEncodedLength(value.unscaledValue()); } /** * Encodes the given optional byte array into a variable amount of bytes. If * the byte array is null, exactly 1 byte is written. Otherwise, the amount * written can be determined by calling calculateEncodedLength. * * @param value byte array value to encode, may be null * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(byte[] value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } return encode(value, 0, value.length, dst, dstOffset); } /** * Encodes the given optional byte array into a variable amount of bytes. If * the byte array is null, exactly 1 byte is written. Otherwise, the amount * written can be determined by calling calculateEncodedLength. * * @param value byte array value to encode, may be null * @param valueOffset offset into byte array * @param valueLength length of data in byte array * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(byte[] value, int valueOffset, int valueLength, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } // Write the value length first, in a variable amount of bytes. int amt = encodeUnsignedVarInt(valueLength, dst, dstOffset); // Now write the value. System.arraycopy(value, valueOffset, dst, dstOffset + amt, valueLength); return amt + valueLength; } /** * Returns the amount of bytes required to encode the given byte array. * * @param value byte array value to encode, may be null * @return amount of bytes needed to encode */ public static int calculateEncodedLength(byte[] value) { return value == null ? 1 : calculateEncodedLength(value, 0, value.length); } /** * Returns the amount of bytes required to encode the given byte array. * * @param value byte array value to encode, may be null * @param valueOffset offset into byte array * @param valueLength length of data in byte array * @return amount of bytes needed to encode */ public static int calculateEncodedLength(byte[] value, int valueOffset, int valueLength) { return value == null ? 1 : (unsignedVarIntLength(valueLength) + valueLength); } /** * Encodes the given optional String into a variable amount of bytes. The * amount written can be determined by calling calculateEncodedStringLength. * <p> * Strings are encoded in a fashion similar to UTF-8, in that ASCII * characters are written in one byte. This encoding is more efficient than * UTF-8, but it isn't compatible with UTF-8. * * @param value String value to encode, may be null * @param dst destination for encoded bytes * @param dstOffset offset into destination array * @return amount of bytes written */ public static int encode(String value, byte[] dst, int dstOffset) { if (value == null) { dst[dstOffset] = NULL_BYTE_HIGH; return 1; } final int originalOffset = dstOffset; int valueLength = value.length(); // Write the value length first, in a variable amount of bytes. dstOffset += encodeUnsignedVarInt(valueLength, dst, dstOffset); for (int i = 0; i < valueLength; i++) { int c = value.charAt(i); if (c <= 0x7f) { dst[dstOffset++] = (byte) c; } else if (c <= 0x3fff) { dst[dstOffset++] = (byte) (0x80 | (c >> 8)); dst[dstOffset++] = (byte) (c & 0xff); } else { if (c >= 0xd800 && c <= 0xdbff) { // Found a high surrogate. Verify that surrogate pair is // well-formed. Low surrogate must follow high surrogate. if (i + 1 < valueLength) { int c2 = value.charAt(i + 1); if (c2 >= 0xdc00 && c2 <= 0xdfff) { c = 0x10000 + (((c & 0x3ff) << 10) | (c2 & 0x3ff)); i++; } } } dst[dstOffset++] = (byte) (0xc0 | (c >> 16)); dst[dstOffset++] = (byte) ((c >> 8) & 0xff); dst[dstOffset++] = (byte) (c & 0xff); } } return dstOffset - originalOffset; } /** * Returns the amount of bytes required to encode the given String. * * @param value String to encode, may be null */ public static int calculateEncodedStringLength(String value) { if (value == null) { return 1; } int valueLength = value.length(); int encodedLen = unsignedVarIntLength(valueLength); for (int i = 0; i < valueLength; i++) { int c = value.charAt(i); if (c <= 0x7f) { encodedLen++; } else if (c <= 0x3fff) { encodedLen += 2; } else { if (c >= 0xd800 && c <= 0xdbff) { // Found a high surrogate. Verify that surrogate pair is // well-formed. Low surrogate must follow high surrogate. if (i + 1 < valueLength) { int c2 = value.charAt(i + 1); if (c2 >= 0xdc00 && c2 <= 0xdfff) { i++; } } } encodedLen += 3; } } return encodedLen; } private static int encodeUnsignedVarInt(int value, byte[] dst, int dstOffset) { if (value < 128) { dst[dstOffset] = (byte) value; return 1; } else if (value < 16384) { dst[dstOffset++] = (byte) ((value >> 8) | 0x80); dst[dstOffset] = (byte) value; return 2; } else if (value < 2097152) { dst[dstOffset++] = (byte) ((value >> 16) | 0xc0); dst[dstOffset++] = (byte) (value >> 8); dst[dstOffset] = (byte) value; return 3; } else if (value < 268435456) { dst[dstOffset++] = (byte) ((value >> 24) | 0xe0); dst[dstOffset++] = (byte) (value >> 16); dst[dstOffset++] = (byte) (value >> 8); dst[dstOffset] = (byte) value; return 4; } else { dst[dstOffset++] = (byte) 0xf0; dst[dstOffset++] = (byte) (value >> 24); dst[dstOffset++] = (byte) (value >> 16); dst[dstOffset++] = (byte) (value >> 8); dst[dstOffset] = (byte) value; return 5; } } private static int unsignedVarIntLength(int value) { if (value < 128) { return 1; } else if (value < 16384) { return 2; } else if (value < 2097152) { return 3; } else if (value < 268435456) { return 4; } else { return 5; } } private static int encodeSignedVarInt(int value, byte[] dst, int dstOffset) { value = (value < 0 ? (((~value) << 1) | 1) : (value << 1)); if (value < 0) { dst[dstOffset++] = (byte) 0xf0; dst[dstOffset++] = (byte) (value >> 24); dst[dstOffset++] = (byte) (value >> 16); dst[dstOffset++] = (byte) (value >> 8); dst[dstOffset] = (byte) value; return 5; } else { return encodeUnsignedVarInt(value, dst, dstOffset); } } private static int signedVarIntLength(int value) { value = (value < 0 ? ~value : value) << 1; return value < 0 ? 5 : unsignedVarIntLength(value); } /** * Writes a positive length value in up to five bytes. * * @return number of bytes written * @since 1.2 */ public static int writeLength(int valueLength, OutputStream out) throws IOException { if (valueLength < 128) { out.write(valueLength); return 1; } else if (valueLength < 16384) { out.write((valueLength >> 8) | 0x80); out.write(valueLength); return 2; } else if (valueLength < 2097152) { out.write((valueLength >> 16) | 0xc0); out.write(valueLength >> 8); out.write(valueLength); return 3; } else if (valueLength < 268435456) { out.write((valueLength >> 24) | 0xe0); out.write(valueLength >> 16); out.write(valueLength >> 8); out.write(valueLength); return 4; } else { out.write(0xf0); out.write(valueLength >> 24); out.write(valueLength >> 16); out.write(valueLength >> 8); out.write(valueLength); return 5; } } /** * Encodes the given byte array for use when there is only a single * property, whose type is a byte array. The original byte array is returned * if the padding lengths are zero. * * @param prefixPadding amount of extra bytes to allocate at start of * encoded byte array * @param suffixPadding amount of extra bytes to allocate at end of encoded * byte array */ public static byte[] encodeSingle(byte[] value, int prefixPadding, int suffixPadding) { if (prefixPadding <= 0 && suffixPadding <= 0) { return value; } int length = value.length; byte[] dst = new byte[prefixPadding + length + suffixPadding]; System.arraycopy(value, 0, dst, prefixPadding, length); return dst; } /** * Encodes the given byte array for use when there is only a single nullable * property, whose type is a byte array. */ public static byte[] encodeSingleNullable(byte[] value) { return encodeSingleNullable(value, 0, 0); } /** * Encodes the given byte array for use when there is only a single nullable * property, whose type is a byte array. * * @param prefixPadding amount of extra bytes to allocate at start of * encoded byte array * @param suffixPadding amount of extra bytes to allocate at end of encoded * byte array */ public static byte[] encodeSingleNullable(byte[] value, int prefixPadding, int suffixPadding) { if (prefixPadding <= 0 && suffixPadding <= 0) { if (value == null) { return new byte[] { NULL_BYTE_HIGH }; } int length = value.length; if (length == 0) { return new byte[] { NOT_NULL_BYTE_HIGH }; } byte[] dst = new byte[1 + length]; dst[0] = NOT_NULL_BYTE_HIGH; System.arraycopy(value, 0, dst, 1, length); return dst; } if (value == null) { byte[] dst = new byte[prefixPadding + 1 + suffixPadding]; dst[prefixPadding] = NULL_BYTE_HIGH; return dst; } int length = value.length; byte[] dst = new byte[prefixPadding + 1 + length + suffixPadding]; dst[prefixPadding] = NOT_NULL_BYTE_HIGH; System.arraycopy(value, 0, dst, prefixPadding + 1, length); return dst; } }