/* * Copyright (C) 2012 Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.collections; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; public class PackedByteArray { private static final byte DEFAULT_DELIMITER = 1; private static final byte DEFAULT_TERMINAL_DELIMITER = 0; private static final int MAGIC_INITIAL_BYTE_ARRAY_SIZE = 256; /** * @param arrays * @return * @see #pack(java.util.List) */ public static byte[] pack(byte[]... arrays) { return pack(Arrays.asList(arrays)); } /** * reads from a DataInput a byte[] till a delimiter is encountered. * Return the bytes (not including the delimiter) * * @param in * @param terminalDelimiter * @return * @throws IOException */ public static byte[] readByteArray( DataInput in, byte terminalDelimiter ) throws IOException { // 256 magic number--just guessing it won't be bigger. If it is, // ArrayList will resize List<Byte> byteList = new ArrayList<Byte>(MAGIC_INITIAL_BYTE_ARRAY_SIZE); byte b; while ((b = in.readByte()) != terminalDelimiter) { byteList.add(b); } return byteListToArray(byteList); } /** * reads from a DataInput a List<byte[]> * * @param in * @param delimiter * @param terminalDelimiter * @return * @throws IOException * @see #packComparable(java.util.List, byte, byte) for format */ public static List<byte[]> readByteArrayList( DataInput in, byte delimiter, byte terminalDelimiter ) throws IOException { // 256 magic number--just guessing it won't be bigger. If it is, // ArrayList will resize List<Byte> byteList = new ArrayList<Byte>(MAGIC_INITIAL_BYTE_ARRAY_SIZE); byte b; while ((b = in.readByte()) != terminalDelimiter) { byteList.add(b); } byteList.add(terminalDelimiter); return unpackComparable( byteListToArray(byteList), delimiter, terminalDelimiter ); } public static byte[] packComparable(byte[]... arrays) { return packComparable(Arrays.asList(arrays)); } public static byte[] packComparable(List<byte[]> arrays) { return packComparable( arrays, DEFAULT_DELIMITER, DEFAULT_TERMINAL_DELIMITER ); } /** * note: the values delimiter and terminalDelimiter * must not only NOT appear in the byte[], but be less than any other * value in the byte array. Defaults used in helper methods are 0 and 1. * These values in the packed byte[] to make it comparable as an unsigned * byte [] * * A future extension would use duplication/padding (0 -> 00, 00 -> 000, etc) * in order to handle this, but it's not needed yet. * * In practice, the byte values are for printable ascii chars and binary data * may be base64 encoded as long as all values are byte values > 1 * * @param arrays array of byte[] to pack * @param delimiter recommend 0 * @param terminalDelimiter recommend 1 * @return */ public static byte[] packComparable( List<byte[]> arrays, byte delimiter, byte terminalDelimiter ) { // item1,delim,item2,delim, ..., terminalDelmiter int packedSize = 0; // terminal delim length included below by overcount for (byte[] array : arrays) { packedSize += array.length + 1; // each item + delim } ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(packedSize); DataOutput output = new DataOutputStream(byteArrayOutputStream); try { boolean first = true; for (byte[] array : arrays) { if (!first) { output.write(delimiter); } output.write(array); first = false; } output.write(terminalDelimiter); } catch (IOException e) { throw new RuntimeException("no reason we should see this", e); } byte[] bytes = byteArrayOutputStream.toByteArray(); return bytes; } public static List<byte[]> unpackComparable(byte[] packedArray) { return unpackComparable(packedArray, (byte)1, (byte)0); } public static List<byte[]> unpackComparable( byte[] packedArray, byte delimiter, byte terminalDelimiter ) { List<byte[]> results = new ArrayList<byte[]>(); List<Byte> currentToken = new ArrayList<Byte>(MAGIC_INITIAL_BYTE_ARRAY_SIZE); // very magic for (int i = 0; i < packedArray.length; i++) { if (packedArray[i] == terminalDelimiter) { results.add(byteListToArray(currentToken)); // end of entire byte array break; } else if (packedArray[i] == delimiter) { // end of an element, store and move to next results.add(byteListToArray(currentToken)); currentToken = new ArrayList<Byte>(MAGIC_INITIAL_BYTE_ARRAY_SIZE); } else { // put byte into current array currentToken.add(packedArray[i]); } } return results; } /** * packs a list of byte[] into a single array in the format * <p/> * {@literal * <numItems><len1,len2,...len_n><item1,item2,...item_n> } * * @param arrayList * @return packed byte array */ public static byte[] pack(List<byte[]> arrays) { // numItems(short) + len1(int) + len2 + ... int packedSize = 2 + 4 * arrays.size(); for (byte[] array : arrays) { packedSize += array.length; } ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(packedSize); DataOutput output = new DataOutputStream(byteArrayOutputStream); try { output.writeShort(arrays.size()); for (byte[] array : arrays) { output.writeInt(array.length); } for (byte[] array : arrays) { output.write(array); } } catch (IOException e) { throw new RuntimeException("no reason we should see this", e); } byte[] bytes = byteArrayOutputStream.toByteArray(); return bytes; } /** * unpack an array packed by pack() * * @param packedArray * @return * @see #pack(byte[]...) */ public static byte[][] unpack(byte[] packedArray) { try { DataInput input = new DataInputStream( new ByteArrayInputStream(packedArray) ); short numItems = input.readShort(); assert numItems >= 0; int[] lens = new int[numItems]; byte[][] arrays = new byte[numItems][]; for (int i = 0; i < numItems; i++) { lens[i] = input.readInt(); } for (int i = 0; i < numItems; i++) { arrays[i] = new byte[lens[i]]; input.readFully(arrays[i]); } return arrays; } catch (IOException e) { throw new RuntimeException("shouldn't see this either", e); } } /** * get an element from a packed array * * @param packedArray * @param pos * @return * @see #pack(byte[]...) */ public static byte[] getElement(byte[] packedArray, int pos) { short numItems = (short) ((packedArray[0] << 8) | packedArray[1]); if (pos > numItems - 1) { throw new IllegalArgumentException( String.format( "index %d is greater than max %d", pos, numItems - 1 ) ); } int i = 0; // numItems(short) + itemLen1 + itemLen2 + ... int dataPtr = 2 + numItems * 4; int dataLen = 0; while (i < numItems) { int j = 2 + (i * 4); int len = byteToInt(packedArray, j); assert len >= 0; if (i < pos) { dataPtr += len; } else { dataLen = len; break; } i++; } return Arrays.copyOfRange(packedArray, dataPtr, dataPtr + dataLen); } /** * not used, but alternative method to convert byte[4] -> int * * @param bytes * @param i * @return */ public static int byteToIntAlt(byte[] bytes, int i) { DataInputStream stream = new DataInputStream( new ByteArrayInputStream(bytes, i, 4) ); int value; try { value = stream.readInt(); } catch (IOException e) { throw new RuntimeException(e); } return value; } public static byte[] byteListToArray(List<Byte> byteList) { byte[] result = new byte[byteList.size()]; int i = 0; for (Byte b : byteList) { result[i++] = b; } return result; } /** * copied from java's Bits.getInt() method which is what's used underneath * the DataOutput.writeInt() above * * @param bytes array containing a 4-byte integer to convert * @param offset offset where 4-byte integer starts * @return */ public static int byteToInt(byte[] bytes, int offset) { return ((bytes[offset + 3] & 0xFF)) + ((bytes[offset + 2] & 0xFF) << 8) + ((bytes[offset + 1] & 0xFF) << 16) + ((bytes[offset]) << 24); } }