package FlexibleEncoding.ORC;
/**
adapted from ORC
@author wangmeng
*/
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;
public class SerializationUtils {
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
// unused
private SerializationUtils() {}
static void writeVulong(OutputStream output, long value) throws IOException {
while (true) {
if ((value & ~0x7f) == 0) {
output.write((byte) value);
return;
} else {
output.write((byte) (0x80 | (value & 0x7f)));
value >>>= 7;
}
}
}
static void writeVslong(OutputStream output, long value) throws IOException {
writeVulong(output, (value << 1) ^ (value >> 63));
}
static long readVulong(InputStream in) throws IOException {
long result = 0;
long b;
int offset = 0;
do {
b = in.read();
if (b == -1) {
throw new EOFException("Reading Vulong past EOF");
}
result |= (0x7f & b) << offset;
offset += 7;
} while (b >= 0x80);
return result;
}
static long readVslong(InputStream in) throws IOException {
long result = readVulong(in);
return (result >>> 1) ^ -(result & 1);
}
static float readFloat(InputStream in) throws IOException {
int ser = in.read() | (in.read() << 8) | (in.read() << 16) |
(in.read() << 24);
return Float.intBitsToFloat(ser);
}
static void writeFloat(OutputStream output, float value) throws IOException {
int ser = Float.floatToIntBits(value);
output.write(ser & 0xff);
output.write((ser >> 8) & 0xff);
output.write((ser >> 16) & 0xff);
output.write((ser >> 24) & 0xff);
}
static double readDouble(InputStream in) throws IOException {
long ser = (long) in.read() |
((long) in.read() << 8) |
((long) in.read() << 16) |
((long) in.read() << 24) |
((long) in.read() << 32) |
((long) in.read() << 40) |
((long) in.read() << 48) |
((long) in.read() << 56);
return Double.longBitsToDouble(ser);
}
static void writeDouble(OutputStream output,
double value) throws IOException {
long ser = Double.doubleToLongBits(value);
output.write(((int) ser) & 0xff);
output.write(((int) (ser >> 8)) & 0xff);
output.write(((int) (ser >> 16)) & 0xff);
output.write(((int) (ser >> 24)) & 0xff);
output.write(((int) (ser >> 32)) & 0xff);
output.write(((int) (ser >> 40)) & 0xff);
output.write(((int) (ser >> 48)) & 0xff);
output.write(((int) (ser >> 56)) & 0xff);
}
/**
* Write the arbitrarily sized signed BigInteger in vint format.
*
* Signed integers are encoded using the low bit as the sign bit using zigzag
* encoding.
*
* Each byte uses the low 7 bits for data and the high bit for stop/continue.
*
* Bytes are stored LSB first.
* @param output the stream to write to
* @param value the value to output
* @throws IOException
*/
static void writeBigInteger(OutputStream output,
BigInteger value) throws IOException {
// encode the signed number as a positive integer
value = value.shiftLeft(1);
int sign = value.signum();
if (sign < 0) {
value = value.negate();
value = value.subtract(BigInteger.ONE);
}
int length = value.bitLength();
while (true) {
long lowBits = value.longValue() & 0x7fffffffffffffffL;
length -= 63;
// write out the next 63 bits worth of data
for(int i=0; i < 9; ++i) {
// if this is the last byte, leave the high bit off
if (length <= 0 && (lowBits & ~0x7f) == 0) {
output.write((byte) lowBits);
return;
} else {
output.write((byte) (0x80 | (lowBits & 0x7f)));
lowBits >>>= 7;
}
}
value = value.shiftRight(63);
}
}
/**
* Read the signed arbitrary sized BigInteger BigInteger in vint format
* @param input the stream to read from
* @return the read BigInteger
* @throws IOException
*/
static BigInteger readBigInteger(InputStream input) throws IOException {
BigInteger result = BigInteger.ZERO;
long work = 0;
int offset = 0;
long b;
do {
b = input.read();
if (b == -1) {
throw new EOFException("Reading BigInteger past EOF from " + input);
}
work |= (0x7f & b) << (offset % 63);
offset += 7;
// if we've read 63 bits, roll them into the result
if (offset == 63) {
result = BigInteger.valueOf(work);
work = 0;
} else if (offset % 63 == 0) {
result = result.or(BigInteger.valueOf(work).shiftLeft(offset-63));
work = 0;
}
} while (b >= 0x80);
if (work != 0) {
result = result.or(BigInteger.valueOf(work).shiftLeft((offset/63)*63));
}
// convert back to a signed number
boolean isNegative = result.testBit(0);
if (isNegative) {
result = result.add(BigInteger.ONE);
result = result.negate();
}
result = result.shiftRight(1);
return result;
}
enum FixedBitSizes {
ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR;
}
/**
* Count the number of bits required to encode the given value
* @param value
* @return bits required to store value
*/
static int findClosestNumBits(long value) {
int count = 0;
while (value > 0) {
count++;
value = value >>> 1;
}
return getClosestFixedBits(count);
}
/**
* zigzag encode the given value
* @param val
* @return zigzag encoded value
*/
static long zigzagEncode(long val) {
return (val << 1) ^ (val >> 63);
}
/**
* zigzag decode the given value
* @param val
* @return zizag decoded value
*/
static long zigzagDecode(long val) {
return (val >>> 1) ^ -(val & 1);
}
/**
* Compute the bits required to represent pth percentile value
* @param data - array
* @param p - percentile value (>=0.0 to <=1.0)
* @return pth percentile bits
*/
static int percentileBits(long[] data, double p) {
if ((p > 1.0) || (p <= 0.0)) {
return -1;
}
// histogram that store the encoded bit requirement for each values.
// maximum number of bits that can encoded is 32 (refer FixedBitSizes)
int[] hist = new int[32];
// compute the histogram
for(long l : data) {
int idx = encodeBitWidth(findClosestNumBits(l));
hist[idx] += 1;
}
int len = data.length;
int perLen = (int) (len * (1.0 - p));
// return the bits required by pth percentile length
for(int i = hist.length - 1; i >= 0; i--) {
perLen -= hist[i];
if (perLen < 0) {
return decodeBitWidth(i);
}
}
return 0;
}
/**
* Read n bytes in big endian order and convert to long
* @param b - byte array
* @return long value
*/
static long bytesToLongBE(InStream input, int n) throws IOException {
long out = 0;
long val = 0;
while (n > 0) {
n--;
// store it in a long and then shift else integer overflow will occur
val = input.read();
out |= (val << (n * 8));
}
return out;
}
/**
* Calculate the number of bytes required
* @param n - number of values
* @param numBits - bit width
* @return number of bytes required
*/
static int getTotalBytesRequired(int n, int numBits) {
return (n * numBits + 7) / 8;
}
/**
* For a given fixed bit this function will return the closest available fixed
* bit
* @param n
* @return closest valid fixed bit
*/
static int getClosestFixedBits(int n) {
if (n == 0) {
return 1;
}
if (n >= 1 && n <= 24) {
return n;
} else if (n > 24 && n <= 26) {
return 26;
} else if (n > 26 && n <= 28) {
return 28;
} else if (n > 28 && n <= 30) {
return 30;
} else if (n > 30 && n <= 32) {
return 32;
} else if (n > 32 && n <= 40) {
return 40;
} else if (n > 40 && n <= 48) {
return 48;
} else if (n > 48 && n <= 56) {
return 56;
} else {
return 64;
}
}
/**
* Finds the closest available fixed bit width match and returns its encoded
* value (ordinal)
* @param n - fixed bit width to encode
* @return encoded fixed bit width
*/
static int encodeBitWidth(int n) {
n = getClosestFixedBits(n);
if (n >= 1 && n <= 24) {
return n - 1;
} else if (n > 24 && n <= 26) {
return FixedBitSizes.TWENTYSIX.ordinal();
} else if (n > 26 && n <= 28) {
return FixedBitSizes.TWENTYEIGHT.ordinal();
} else if (n > 28 && n <= 30) {
return FixedBitSizes.THIRTY.ordinal();
} else if (n > 30 && n <= 32) {
return FixedBitSizes.THIRTYTWO.ordinal();
} else if (n > 32 && n <= 40) {
return FixedBitSizes.FORTY.ordinal();
} else if (n > 40 && n <= 48) {
return FixedBitSizes.FORTYEIGHT.ordinal();
} else if (n > 48 && n <= 56) {
return FixedBitSizes.FIFTYSIX.ordinal();
} else {
return FixedBitSizes.SIXTYFOUR.ordinal();
}
}
/**
* Decodes the ordinal fixed bit value to actual fixed bit width value
* @param n - encoded fixed bit width
* @return decoded fixed bit width
*/
static int decodeBitWidth(int n) {
if (n >= FixedBitSizes.ONE.ordinal()
&& n <= FixedBitSizes.TWENTYFOUR.ordinal()) {
return n + 1;
} else if (n == FixedBitSizes.TWENTYSIX.ordinal()) {
return 26;
} else if (n == FixedBitSizes.TWENTYEIGHT.ordinal()) {
return 28;
} else if (n == FixedBitSizes.THIRTY.ordinal()) {
return 30;
} else if (n == FixedBitSizes.THIRTYTWO.ordinal()) {
return 32;
} else if (n == FixedBitSizes.FORTY.ordinal()) {
return 40;
} else if (n == FixedBitSizes.FORTYEIGHT.ordinal()) {
return 48;
} else if (n == FixedBitSizes.FIFTYSIX.ordinal()) {
return 56;
} else {
return 64;
}
}
/**
* Bitpack and write the input values to underlying output stream
* @param input - values to write
* @param offset - offset
* @param len - length
* @param bitSize - bit width
* @param output - output stream
* @throws IOException
*/
static void writeInts(long[] input, int offset, int len, int bitSize,
OutputStream output) throws IOException {
if (input == null || input.length < 1 || offset < 0 || len < 1
|| bitSize < 1) {
return;
}
int bitsLeft = 8;
byte current = 0;
for(int i = offset; i < (offset + len); i++) {
long value = input[i];
int bitsToWrite = bitSize;
while (bitsToWrite > bitsLeft) {
// add the bits to the bottom of the current word
current |= value >>> (bitsToWrite - bitsLeft);
// subtract out the bits we just added
bitsToWrite -= bitsLeft;
// zero out the bits above bitsToWrite
value &= (1L << bitsToWrite) - 1;
output.write(current);
current = 0;
bitsLeft = 8;
}
bitsLeft -= bitsToWrite;
current |= value << bitsLeft;
if (bitsLeft == 0) {
output.write(current);
current = 0;
bitsLeft = 8;
}
}
// flush
if (bitsLeft != 8) {
output.write(current);
current = 0;
bitsLeft = 8;
}
}
/**
* Read bitpacked integers from input stream
* @param buffer - input buffer
* @param offset - offset
* @param len - length
* @param bitSize - bit width
* @param input - input stream
* @throws IOException
*/
static void readInts(long[] buffer, int offset, int len, int bitSize,
InStream input) throws IOException {
int bitsLeft = 0;
int current = 0;
for(int i = offset; i < (offset + len); i++) {
long result = 0;
int bitsLeftToRead = bitSize;
while (bitsLeftToRead > bitsLeft) {
result <<= bitsLeft;
result |= current & ((1 << bitsLeft) - 1);
bitsLeftToRead -= bitsLeft;
current = input.read();
bitsLeft = 8;
}
// handle the left over bits
if (bitsLeftToRead > 0) {
result <<= bitsLeftToRead;
bitsLeft -= bitsLeftToRead;
result |= (current >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
}
buffer[i] = result;
}
}
}