SerializationUtils.java example

Explorer
----Data---Storage---master
- src
package FlexibleEncoding.ORC;

/**
adapted from ORC
@author wangmeng
 */


import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.math.BigInteger;

public  class SerializationUtils {
	/**
	 * Licensed to the Apache Software Foundation (ASF) under one
	 * or more contributor license agreements.  See the NOTICE file
	 * distributed with this work for additional information
	 * regarding copyright ownership.  The ASF licenses this file
	 * to you under the Apache License, Version 2.0 (the
	 * "License"); you may not use this file except in compliance
	 * with the License.  You may obtain a copy of the License at
	 *
	 *     http://www.apache.org/licenses/LICENSE-2.0
	 *
	 * Unless required by applicable law or agreed to in writing, software
	 * distributed under the License is distributed on an "AS IS" BASIS,
	 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	 * See the License for the specific language governing permissions and
	 * limitations under the License.
	 */

	  // unused
	  private SerializationUtils() {}

	  static void writeVulong(OutputStream output, long value) throws IOException {
	    while (true) {
	      if ((value & ~0x7f) == 0) {
	        output.write((byte) value);
	        return;
	      } else {
	        output.write((byte) (0x80 | (value & 0x7f)));
	        value >>>= 7;
	      }
	    }
	  }

	  static void writeVslong(OutputStream output, long value) throws IOException {
	    writeVulong(output, (value << 1) ^ (value >> 63));
	  }


	  static long readVulong(InputStream in) throws IOException {
	    long result = 0;
	    long b;
	    int offset = 0;
	    do {
	      b = in.read();
	      if (b == -1) {
	        throw new EOFException("Reading Vulong past EOF");
	      }
	      result |= (0x7f & b) << offset;
	      offset += 7;
	    } while (b >= 0x80);
	    return result;
	  }

	  static long readVslong(InputStream in) throws IOException {
	    long result = readVulong(in);
	    return (result >>> 1) ^ -(result & 1);
	  }

	  static float readFloat(InputStream in) throws IOException {
	    int ser = in.read() | (in.read() << 8) | (in.read() << 16) |
	      (in.read() << 24);
	    return Float.intBitsToFloat(ser);
	  }

	  static void writeFloat(OutputStream output, float value) throws IOException {
	    int ser = Float.floatToIntBits(value);
	    output.write(ser & 0xff);
	    output.write((ser >> 8) & 0xff);
	    output.write((ser >> 16) & 0xff);
	    output.write((ser >> 24) & 0xff);
	  }

	  static double readDouble(InputStream in) throws IOException {
	  long ser = (long) in.read() |
	             ((long) in.read() << 8) |
	             ((long) in.read() << 16) |
	             ((long) in.read() << 24) |
	             ((long) in.read() << 32) |
	             ((long) in.read() << 40) |
	             ((long) in.read() << 48) |
	             ((long) in.read() << 56);
	    return Double.longBitsToDouble(ser);
	  }

	  static void writeDouble(OutputStream output,
	                          double value) throws IOException {
	    long ser = Double.doubleToLongBits(value);
	    output.write(((int) ser) & 0xff);
	    output.write(((int) (ser >> 8)) & 0xff);
	    output.write(((int) (ser >> 16)) & 0xff);
	    output.write(((int) (ser >> 24)) & 0xff);
	    output.write(((int) (ser >> 32)) & 0xff);
	    output.write(((int) (ser >> 40)) & 0xff);
	    output.write(((int) (ser >> 48)) & 0xff);
	    output.write(((int) (ser >> 56)) & 0xff);
	  }

	  /**
	   * Write the arbitrarily sized signed BigInteger in vint format.
	   *
	   * Signed integers are encoded using the low bit as the sign bit using zigzag
	   * encoding.
	   *
	   * Each byte uses the low 7 bits for data and the high bit for stop/continue.
	   *
	   * Bytes are stored LSB first.
	   * @param output the stream to write to
	   * @param value the value to output
	   * @throws IOException
	   */
	  static void writeBigInteger(OutputStream output,
	                              BigInteger value) throws IOException {
	    // encode the signed number as a positive integer
	    value = value.shiftLeft(1);
	    int sign = value.signum();
	    if (sign < 0) {
	      value = value.negate();
	      value = value.subtract(BigInteger.ONE);
	    }
	    int length = value.bitLength();
	    while (true) {
	      long lowBits = value.longValue() & 0x7fffffffffffffffL;
	      length -= 63;
	      // write out the next 63 bits worth of data
	      for(int i=0; i < 9; ++i) {
	        // if this is the last byte, leave the high bit off
	        if (length <= 0 && (lowBits & ~0x7f) == 0) {
	          output.write((byte) lowBits);
	          return;
	        } else {
	          output.write((byte) (0x80 | (lowBits & 0x7f)));
	          lowBits >>>= 7;
	        }
	      }
	      value = value.shiftRight(63);
	    }
	  }

	  /**
	   * Read the signed arbitrary sized BigInteger BigInteger in vint format
	   * @param input the stream to read from
	   * @return the read BigInteger
	   * @throws IOException
	   */
	  static BigInteger readBigInteger(InputStream input) throws IOException {
	    BigInteger result = BigInteger.ZERO;
	    long work = 0;
	    int offset = 0;
	    long b;
	    do {
	      b = input.read();
	      if (b == -1) {
	        throw new EOFException("Reading BigInteger past EOF from " + input);
	      }
	      work |= (0x7f & b) << (offset % 63);
	      offset += 7;
	      // if we've read 63 bits, roll them into the result
	      if (offset == 63) {
	        result = BigInteger.valueOf(work);
	        work = 0;
	      } else if (offset % 63 == 0) {
	        result = result.or(BigInteger.valueOf(work).shiftLeft(offset-63));
	        work = 0;
	      }
	    } while (b >= 0x80);
	    if (work != 0) {
	      result = result.or(BigInteger.valueOf(work).shiftLeft((offset/63)*63));
	    }
	    // convert back to a signed number
	    boolean isNegative = result.testBit(0);
	    if (isNegative) {
	      result = result.add(BigInteger.ONE);
	      result = result.negate();
	    }
	    result = result.shiftRight(1);
	    return result;
	  }

	  enum FixedBitSizes {
	    ONE, TWO, THREE, FOUR, FIVE, SIX, SEVEN, EIGHT, NINE, TEN, ELEVEN, TWELVE,
	    THIRTEEN, FOURTEEN, FIFTEEN, SIXTEEN, SEVENTEEN, EIGHTEEN, NINETEEN,
	    TWENTY, TWENTYONE, TWENTYTWO, TWENTYTHREE, TWENTYFOUR, TWENTYSIX,
	    TWENTYEIGHT, THIRTY, THIRTYTWO, FORTY, FORTYEIGHT, FIFTYSIX, SIXTYFOUR;
	  }

	  /**
	   * Count the number of bits required to encode the given value
	   * @param value
	   * @return bits required to store value
	   */
	  static int findClosestNumBits(long value) {
	    int count = 0;
	    while (value > 0) {
	      count++;
	      value = value >>> 1;
	    }
	    return getClosestFixedBits(count);
	  }

	  /**
	   * zigzag encode the given value
	   * @param val
	   * @return zigzag encoded value
	   */
	  static long zigzagEncode(long val) {
	    return (val << 1) ^ (val >> 63);
	  }

	  /**
	   * zigzag decode the given value
	   * @param val
	   * @return zizag decoded value
	   */
	  static long zigzagDecode(long val) {
	    return (val >>> 1) ^ -(val & 1);
	  }

	  /**
	   * Compute the bits required to represent pth percentile value
	   * @param data - array
	   * @param p - percentile value (>=0.0 to <=1.0)
	   * @return pth percentile bits
	   */
	  static int percentileBits(long[] data, double p) {
	    if ((p > 1.0) || (p <= 0.0)) {
	      return -1;
	    }

	    // histogram that store the encoded bit requirement for each values.
	    // maximum number of bits that can encoded is 32 (refer FixedBitSizes)
	    int[] hist = new int[32];

	    // compute the histogram
	    for(long l : data) {
	      int idx = encodeBitWidth(findClosestNumBits(l));
	      hist[idx] += 1;
	    }

	    int len = data.length;
	    int perLen = (int) (len * (1.0 - p));

	    // return the bits required by pth percentile length
	    for(int i = hist.length - 1; i >= 0; i--) {
	      perLen -= hist[i];
	      if (perLen < 0) {
	        return decodeBitWidth(i);
	      }
	    }

	    return 0;
	  }

	  /**
	   * Read n bytes in big endian order and convert to long
	   * @param b - byte array
	   * @return long value
	   */
	  static long bytesToLongBE(InStream input, int n) throws IOException {
	    long out = 0;
	    long val = 0;
	    while (n > 0) {
	      n--;
	      // store it in a long and then shift else integer overflow will occur
	      val = input.read();
	      out |= (val << (n * 8));
	    }
	    return out;
	  }

	  /**
	   * Calculate the number of bytes required
	   * @param n - number of values
	   * @param numBits - bit width
	   * @return number of bytes required
	   */
	  static int getTotalBytesRequired(int n, int numBits) {
	    return (n * numBits + 7) / 8;
	  }

	  /**
	   * For a given fixed bit this function will return the closest available fixed
	   * bit
	   * @param n
	   * @return closest valid fixed bit
	   */
	  static int getClosestFixedBits(int n) {
	    if (n == 0) {
	      return 1;
	    }

	    if (n >= 1 && n <= 24) {
	      return n;
	    } else if (n > 24 && n <= 26) {
	      return 26;
	    } else if (n > 26 && n <= 28) {
	      return 28;
	    } else if (n > 28 && n <= 30) {
	      return 30;
	    } else if (n > 30 && n <= 32) {
	      return 32;
	    } else if (n > 32 && n <= 40) {
	      return 40;
	    } else if (n > 40 && n <= 48) {
	      return 48;
	    } else if (n > 48 && n <= 56) {
	      return 56;
	    } else {
	      return 64;
	    }
	  }

	  /**
	   * Finds the closest available fixed bit width match and returns its encoded
	   * value (ordinal)
	   * @param n - fixed bit width to encode
	   * @return encoded fixed bit width
	   */
	  static int encodeBitWidth(int n) {
	    n = getClosestFixedBits(n);

	    if (n >= 1 && n <= 24) {
	      return n - 1;
	    } else if (n > 24 && n <= 26) {
	      return FixedBitSizes.TWENTYSIX.ordinal();
	    } else if (n > 26 && n <= 28) {
	      return FixedBitSizes.TWENTYEIGHT.ordinal();
	    } else if (n > 28 && n <= 30) {
	      return FixedBitSizes.THIRTY.ordinal();
	    } else if (n > 30 && n <= 32) {
	      return FixedBitSizes.THIRTYTWO.ordinal();
	    } else if (n > 32 && n <= 40) {
	      return FixedBitSizes.FORTY.ordinal();
	    } else if (n > 40 && n <= 48) {
	      return FixedBitSizes.FORTYEIGHT.ordinal();
	    } else if (n > 48 && n <= 56) {
	      return FixedBitSizes.FIFTYSIX.ordinal();
	    } else {
	      return FixedBitSizes.SIXTYFOUR.ordinal();
	    }
	  }

	  /**
	   * Decodes the ordinal fixed bit value to actual fixed bit width value
	   * @param n - encoded fixed bit width
	   * @return decoded fixed bit width
	   */
	  static int decodeBitWidth(int n) {
	    if (n >= FixedBitSizes.ONE.ordinal()
	        && n <= FixedBitSizes.TWENTYFOUR.ordinal()) {
	      return n + 1;
	    } else if (n == FixedBitSizes.TWENTYSIX.ordinal()) {
	      return 26;
	    } else if (n == FixedBitSizes.TWENTYEIGHT.ordinal()) {
	      return 28;
	    } else if (n == FixedBitSizes.THIRTY.ordinal()) {
	      return 30;
	    } else if (n == FixedBitSizes.THIRTYTWO.ordinal()) {
	      return 32;
	    } else if (n == FixedBitSizes.FORTY.ordinal()) {
	      return 40;
	    } else if (n == FixedBitSizes.FORTYEIGHT.ordinal()) {
	      return 48;
	    } else if (n == FixedBitSizes.FIFTYSIX.ordinal()) {
	      return 56;
	    } else {
	      return 64;
	    }
	  }

	  /**
	   * Bitpack and write the input values to underlying output stream
	   * @param input - values to write
	   * @param offset - offset
	   * @param len - length
	   * @param bitSize - bit width
	   * @param output - output stream
	   * @throws IOException
	   */
	  static void writeInts(long[] input, int offset, int len, int bitSize,
	                        OutputStream output) throws IOException {
	    if (input == null || input.length < 1 || offset < 0 || len < 1
	        || bitSize < 1) {
	      return;
	    }

	    int bitsLeft = 8;
	    byte current = 0;
	    for(int i = offset; i < (offset + len); i++) {
	      long value = input[i];
	      int bitsToWrite = bitSize;
	      while (bitsToWrite > bitsLeft) {
	        // add the bits to the bottom of the current word
	        current |= value >>> (bitsToWrite - bitsLeft);
	        // subtract out the bits we just added
	        bitsToWrite -= bitsLeft;
	        // zero out the bits above bitsToWrite
	        value &= (1L << bitsToWrite) - 1;
	        output.write(current);
	        current = 0;
	        bitsLeft = 8;
	      }
	      bitsLeft -= bitsToWrite;
	      current |= value << bitsLeft;
	      if (bitsLeft == 0) {
	        output.write(current);
	        current = 0;
	        bitsLeft = 8;
	      }
	    }

	    // flush
	    if (bitsLeft != 8) {
	      output.write(current);
	      current = 0;
	      bitsLeft = 8;
	    }
	  }

	  /**
	   * Read bitpacked integers from input stream
	   * @param buffer - input buffer
	   * @param offset - offset
	   * @param len - length
	   * @param bitSize - bit width
	   * @param input - input stream
	   * @throws IOException
	   */
	  static void readInts(long[] buffer, int offset, int len, int bitSize,
	                       InStream input) throws IOException {
	    int bitsLeft = 0;
	    int current = 0;

	    for(int i = offset; i < (offset + len); i++) {
	      long result = 0;
	      int bitsLeftToRead = bitSize;
	      while (bitsLeftToRead > bitsLeft) {
	        result <<= bitsLeft;
	        result |= current & ((1 << bitsLeft) - 1);
	        bitsLeftToRead -= bitsLeft;
	        current = input.read();
	        bitsLeft = 8;
	      }

	      // handle the left over bits
	      if (bitsLeftToRead > 0) {
	        result <<= bitsLeftToRead;
	        bitsLeft -= bitsLeftToRead;
	        result |= (current >> bitsLeft) & ((1 << bitsLeftToRead) - 1);
	      }
	      buffer[i] = result;
	    }
	  }
	}