DataUtils.java example

Explorer
alien-ofelia-conet-ccnx-master
/*
 * Part of the CCNx Java Library.
 *
 * Copyright (C) 2008-2012 Palo Alto Research Center, Inc.
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License version 2.1
 * as published by the Free Software Foundation.
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details. You should have received
 * a copy of the GNU Lesser General Public License along with this library;
 * if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
 * Fifth Floor, Boston, MA 02110-1301 USA.
 */

package org.ccnx.ccn.impl.support;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.math.BigInteger;
import java.nio.charset.Charset;
import java.nio.charset.UnsupportedCharsetException;
import java.util.ArrayList;

import org.bouncycastle.util.encoders.Base64;
import org.ccnx.ccn.config.SystemConfiguration;

/**
 * Miscellaneous utility routines for CCN, mostly data comparison and conversion.
 */
public final class DataUtils {

	public static final int BITS_PER_BYTE = 8;
	public static final String EMPTY = "";
	public static final String LINE_SEPARATOR = System.getProperty("line.separator");

	/**
	 * Useful when we move over to 1.6, and can avoid UnsupportedCharsetExceptions this way.
	 */
	public static Charset UTF8_CHARSET;

	static {
		try {
			UTF8_CHARSET = Charset.forName("UTF-8");
			if (null == UTF8_CHARSET) {
				// This shouldn't happen, but be noisy about it if it does...
				throw new UnsupportedCharsetException("Attempt to retrieve the UTF-8 charset returned null! Significant configuration error!");
			}
		} catch (Exception e) { // Should be UnsupportedCharsetException or IllegalCharsetNameException
			Log.severe("Unknown encoding UTF-8! This is a significant configuration problem.");
			throw new RuntimeException("Cannot find UTF-8 encoding. Significant configuration error");
		}
	}

	public static <T extends Comparable<T>> int compare(T left, T right) {
		int result = 0;
		if (null != left) {
			if (null == right)
				return 1; // sort nothing before something
			result = left.compareTo(right);
		} else {
			if (null != right)
				result = -1; // sort nothing before something
			// else fall through and compare publishers
			else
				result = 0; // null == null
		}
		return result;
	}

	/**
	 * Perform a shortlex comparison of byte arrays in canonical CCN ordering.
	 * Shortlex ordering is ordering by cardinality, then by lexigraphic.
	 *
	 * MM - This method should really be renamed to "shortlex" or something
	 * other than "compare", unless it is needed for an Override name.
	 *
	 * @param left
	 * @param right
	 * @return < 0 if left comes before right, 0 if they are equal, > 0 if left comes after right
	 */
	public static int compare(byte [] left, byte [] right) {
		if (null != left) {
			if (null == right) {
				return (1);
			} else {
				int leftLength = left.length;
				int rightLength = right.length;
				// If a is shorter than b then a comes before b
				if (leftLength < rightLength) {
					return (-1);
				} else if (leftLength > rightLength) {
					return (1);
				} else {
					// They have equal lengths - compare byte by byte
					for (int i=0; i < leftLength; ++i) {
						short leftSubI = (short)(left[i] & 0xff);
						short rightSubI = (short)(right[i] & 0xff);
						if (leftSubI < rightSubI) {
							return (-1);
						} else if (leftSubI > rightSubI) {
							return (1);
						}
					}
				}
			}
		} else {
			if (null != right)
				return (-1); // sort nothing before something
			// else fall through and compare publishers
			else
				return (0); // null == null
		}
		return (0);
	}

	/**
	 * This is not like compare(byte[], byte[]).  That is shortlex.  This
	 * is an actual lexigraphic ordering based on the shortlex compare
	 * of each byte array.
	 * @see compare(byte[], byte[])
	 */
	public static int compare(ArrayList<byte []> left, ArrayList<byte []> right) {

		int result = 0;
		if (null != left) {
			if (null == right) {
				result = 1;
			} else {
				// here we have the comparison.
				int leftSize = left.size();
				int rightSize = right.size();
				int minlen = (leftSize < rightSize) ? leftSize : rightSize;
				for (int i=0; i < minlen; ++i) {
					result = compare(left.get(i), right.get(i));
					if (0 != result) break;
				}
				if (result == 0) {
					// ok, they're equal up to the minimum length
					if (leftSize < rightSize) {
						result = -1;
					} else if (leftSize > rightSize) {
						result = 1;
					}
					// else they're equal, result = 0
				}
			}
		} else {
			if (null != right)
				result = -1; // sort nothing before something
			// else fall through and compare publishers
			else
				result = 0; // null == null
		}
		return result;
	}

	/**
	 * Used to print non ASCII components for logging, etc.
	 *
	 * @param bytes
	 * @return the data as a BigInteger String
	 */
	public static String printBytes(byte [] bytes) {
		if (bytes == null) {
			return "";
		}
		BigInteger bi = new BigInteger(1, bytes);
		return bi.toString(SystemConfiguration.DEBUG_RADIX);
	}

	/**
	 * Used to print components to be interpreted as hexadecimal such as segments
	 * @param bytes
	 * @return the data as a Hexadecimal String
	 */
	public static String printHexBytes(byte [] bytes) {
		if ((null == bytes) || (bytes.length == 0)) {
			return "<empty>";
		}
		BigInteger bi = new BigInteger(1, bytes);
		return bi.toString(16);
	}

	/**
	 * A place to centralize interfaces to base64 encoding/decoding, as the classes
	 * we use change depending on what ships with Java.
	 */

	public static byte [] base64Decode(byte [] input) throws IOException {
		return Base64.decode(input);
	}

	public static byte [] base64Encode(byte [] input) {
		return Base64.encode(input);
	}

	public static final int LINELEN = 64;

	public static String base64Encode(byte [] input, Integer lineLength) {
		byte [] encodedBytes = base64Encode(input);
		return lineWrap(DataUtils.getUTF8StringFromBytes(encodedBytes), LINELEN);
	}
	/**
	 * @deprecated not used in CCNx, candidate for removal in future release.
	 * @param input
	 * @param lineLength
	 * @return the byte array with added CRLF line-breaks and null termination.
	 */
	@Deprecated
	public static byte [] lineWrapBase64(byte [] input, int lineLength) {
		int finalLen = input.length + 2*(input.length/lineLength) + 3;
		byte output[] = new byte[finalLen];
		// add line breaks
		int outidx = 0;
		int inidx = 0;
		while (inidx < input.length) {
			output[outidx] = input[inidx];
			outidx++;
			inidx++;
			if ((inidx % lineLength) == 0) {
				output[outidx++] = (byte)0x0D;
				output[outidx++] = (byte)0x0A;
			}
		}
		output[outidx]='\0';
		return (output);

	}

	/**
	 * @param inputString
	 * @param lineLength
	 * @return
	 */
	public static String lineWrap(String inputString, int lineLength) {
		if ((null == inputString) || (inputString.length() <= lineLength)) {
			return inputString;
		}

		StringBuffer line = new StringBuffer(inputString);

		int length = inputString.length();
		int sepLen = LINE_SEPARATOR.length();
		int index = lineLength - sepLen;
		while (index < length - sepLen) {
			line.insert(index, LINE_SEPARATOR);
			index += lineLength;
			length += sepLen;
		}
		return line.toString();
	}

	/**
	 * byte array compare
	 * @param left
	 * @param right
	 * @return true if equal
	 */
	public static boolean arrayEquals(byte[] left, byte[] right) {
		if (left == null) {
			return ((right == null) ? true : false);
		}
		if (right == null) {
			return ((left == null) ? true : false);
		}
		if (left.length != right.length)
			return false;
		for (int i = 0; i < left.length; i++) {
			if (left[i] != right[i])
				return false;
		}
		return true;
	}

	/**
	 * byte array compare
	 * @param left
	 * @param right
	 * @param length
	 * @return true if equal
	 */
	public static boolean arrayEquals(byte[] left, byte[] right, int length) {
		if (left == null) {
			return ((right == null) ? true : false);
		}
		if (right == null) {
			return ((left == null) ? true : false);
		}

		// If one of left or right is shorter than length, arrays
		// must be same length to be equal.
		if( left.length < length || right.length < length )
			if( left.length != right.length )
				return false;

		int minarray = (left.length < right.length) ? left.length : right.length;
		int minlen   = (length < minarray) ? length : minarray;

		for (int i = 0; i < minlen; i++) {
			if (left[i] != right[i])
				return false;
		}
		return true;
	}

	/**
	 * Check if a byte array starts with a certain prefix.
	 *
	 * Used to check for binary prefixes used to mark certain ContentName components for special purposes.
	 *
	 * @param prefix bytes to look for, if null this method always returns true.
	 * @param data data to inspect. If null this method always returns false.
	 * @return true if data starts with prefix.
	 */
	public static boolean isBinaryPrefix(byte [] prefix, byte [] data) {
		if ((null == prefix) || (prefix.length == 0))
			return true;
		if ((null == data) || (data.length < prefix.length))
			return false;
		for (int i=0; i < prefix.length; ++i) {
			if (prefix[i] != data[i])
				return false;
		}
		return true;
	}

	/**
	 * Recursively delete a directory and all its contents.
	 * If given File does not exist, this method returns with no error
	 * but if it exists as a file not a directory, an exception will be thrown.
	 * Similar to org.apache.commons.io.FileUtils.deleteDirectory
	 * but avoids dependency on that library for minimal use.
	 * @param directory
	 * @throws IOException if "directory" is a file
	 */
	public static void deleteDirectory(File directory) throws IOException {
		if (!directory.exists()) {
			return;
		}
		if (!directory.isDirectory()) {
			throw new IOException(directory.getPath() + " is not a directory");
		}
		for (File child : directory.listFiles()) {
			if (child.isDirectory()) {
				deleteDirectory(child);
			} else {
				child.delete();
			}
		}
		directory.delete();
	}

	/**
	 * This was used in early content demos; keep it around as it may be generally useful.
	 * @param file
	 * @return
	 * @throws IOException
	 */
	public static byte[] getBytesFromFile(File file) throws IOException {
		InputStream is = new FileInputStream(file);

		// Get the size of the file
		long length = file.length();

		if (length > Integer.MAX_VALUE) {
			throw new IOException("File is too large: " + file.getName());
		}

		// Create the byte array to hold the data
		byte[] bytes = new byte[(int)length];

		// Read in the bytes
		int offset = 0;
		int numRead = 0;
		while (offset < bytes.length
				&& (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
			offset += numRead;
		}

		// Ensure all the bytes have been read in
		if (offset < bytes.length) {
			throw new IOException("Could not completely read file "+file.getName());
		}

		// Close the input stream and return bytes
		is.close();
		return bytes;
	}

	/**
	 * Read a stream (usually small) completely in to a byte array. Used to get all of the
	 * bytes out of one or more content objects for decoding or other processing, where the
	 * content needs to be handed to something else as a unit.
	 */
	public static byte [] getBytesFromStream(InputStream input) throws IOException {
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		byte [] buf = new byte[1024];
		int byteCount = 0;
		byteCount = input.read(buf);
		while (byteCount > 0) {
			baos.write(buf, 0, byteCount);
			byteCount = input.read(buf);
		}
		return baos.toByteArray();
	}

	/**
	 * Wrap up handling of UTF-8 encoding in one place (as much as possible), because
	 * an UnsupportedEncodingException in response to a request for UTF-8 signals
	 * a significant configuration error; we should catch it and signal a RuntimeException
	 * in one place and let the rest of the code not worry about it.
	 */
	public static String getUTF8StringFromBytes(byte [] stringBytes) {
		try {
			// Version taking a Charset not available till 1.6.
			return new String(stringBytes, "UTF-8");
		} catch (UnsupportedEncodingException e) {
			Log.severe("Unknown encoding UTF-8! This is a significant configuration problem.");
			throw new RuntimeException("Unknown encoding UTF-8! This is a significant configuration problem.");
		}
	}

	/**
	 * Wrap up handling of UTF-8 encoding in one place (as much as possible), because
	 * an UnsupportedEncodingException in response to a request for UTF-8 signals
	 * a significant configuration error; we should catch it and signal a RuntimeException
	 * in one place and let the rest of the code not worry about it.
	 */
	public static byte [] getBytesFromUTF8String(String stringData) {
		try {
			// Version taking a Charset not available till 1.6.
			return stringData.getBytes("UTF-8");
		} catch (UnsupportedEncodingException e) {
			Log.severe("Unknown encoding UTF-8! This is a significant configuration problem.");
			throw new RuntimeException("Unknown encoding UTF-8! This is a significant configuration problem.");
		}
	}

	/**
	 * Lexicographically compare two byte arrays, looking at a limited number of bytes.
	 * @param arr1
	 * @param arr2
	 * @param count Maximum number of bytes to inspect.
	 * @return < 0 if left comes before right, 0 if they are equal, > 0 if left comes after right
	 */
	public static int bytencmp(byte[] arr1, int offset1, byte[] arr2, int offset2, int count) {
		if (null == arr1) {
			if (null == arr2)
				return 0;
			return 1;
		}
		if (null == arr2)
			return -1;

		int cmpcount = Math.min(Math.min(count, (arr1.length-offset1)), (arr2.length-offset2));
		for (int i=offset1, j=offset2; i < cmpcount; ++i, ++j) {
			if (arr1[i] < arr2[j])
				return -1;
			if (arr1[i] > arr2[j])
				return 1;
		}
		if (cmpcount == count)
			return 0;
		// OK, they match up to the length of the shortest one, which is shorter
		// than count. Whichever is shorter is less.
		if (arr1.length > arr2.length)
			return 1;
		if (arr1.length < arr2.length)
			return -1;
		return 0;
	}

	public static int bytencmp(byte [] arr1, byte [] arr2, int count) {
		return bytencmp(arr1, 0, arr2, 0, count);
	}

	/**
	 * Finds the index of the first occurrence of byteToFind in array starting at given
	 * offset, returns 01 if not found.
	 * @param array array to search
	 * @param startingOffset offset into array to start at
	 * @param byteToFind byte to seek
	 * @return position in array containing first occurrence of byteToFind, or array.length if not found
	 */
	public static int byteindex(byte [] array, int startingOffset, byte byteToFind) {
		int byteindex;
		for (byteindex = startingOffset; byteindex < array.length; byteindex++) {
			if (array[byteindex] == byteToFind)
				break;
		}
		return (byteindex == array.length) ? -1 : byteindex;
	}

	/**
	 * Finds the index of the first occurrence of byteToFind in array, returns -1 if not found.
	 * @param array array to search
	 * @param byteToFind byte to seek
	 * @return position in array containing first occurrence of byteToFind, or array.length if not found
	 */
	public static int byteindex(byte [] array, byte byteToFind) {
		return byteindex(array, 0, byteToFind);
	}

	/**
	 * Finds the index of the last occurrence of byteToFind in array starting at given
	 * offset, returns -1 if not found.
	 * @param array array to search
	 * @param startingOffset offset into array to start at
	 * @param byteToFind byte to seek
	 * @return position in array containing first occurrence of byteToFind, or array.length if not found
	 */
	public static int byterindex(byte [] array, int startingOffset, byte byteToFind) {
		int byteindex;
		for (byteindex = startingOffset; byteindex >= 0; byteindex--) {
			if (array[byteindex] == byteToFind)
				break;
		}
		return byteindex;
	}

	/**
	 * Finds the last of the first occurrence of byteToFind in array, returns -1 if not found.
	 * @param array array to search
	 * @param byteToFind byte to seek
	 * @return position in array containing first occurrence of byteToFind, or array.length if not found
	 */
	public static int byterindex(byte [] array, byte byteToFind) {
		return byterindex(array, (array != null) ? array.length : 0, byteToFind);
	}


	/**
	 * Count how may times a given byte occurs in an array.
	 */
	public static int occurcount(byte [] array, int startingOffset, int length, byte byteToFind) {
		int count = 0;
		if (array == null)
			return 0;

		for (int i=startingOffset; i < length; ++i) {
			if (array[i] == byteToFind) {
				count++;
			}
		}
		return count;
	}

	public static int occurcount(byte [] array, int length, byte byteToFind) {
		return occurcount(array, 0, (null != array) ? array.length : -1, byteToFind);
	}

	public static int occurcount(byte [] array, byte byteToFind) {
		return occurcount(array, 0, byteToFind);
	}

	/**
	 * Akin to String.split for binary arrays; splits on a given byte value.
	 */
	public static byte [][] binarySplit(byte [] array, int startingOffset, byte splitValue) {
		int index = 0;
		int offset = 0;
		int lastoffset = startingOffset;
		int count = occurcount(array, startingOffset, splitValue) + 1;
		if (count == 1) {
			// no split values; just return the original array
			return new byte [][]{array};
		}
		byte [][] components = new byte[count][];
		while (index < count) {
			offset = byteindex(array, lastoffset, splitValue);
			if (offset < 0) {
				// last one
				offset = array.length;
			}
			components[index] = new byte[offset - lastoffset];
			System.arraycopy(array, lastoffset, components[index], 0, components[index].length);
			lastoffset = offset + 1;
			index++;
		}
		return components;
	}

	public static byte [][] binarySplit(byte [] array, byte splitValue) {
		return binarySplit(array, 0, splitValue);
	}

	public static byte [] subarray(byte [] array, int offset, int len) {
		byte [] newarray = new byte [len];
		System.arraycopy(array, offset, newarray, 0, len);
		return newarray;
	}

	/**
	 * Convert a BigEndian byte array in to a long assuming unsigned values.
	 * No bounds checking is done on the array -- caller should make sure
	 * it is 8 or fewer bytes.
	 *
	 * Should operate like BigInteger(1, bytes).longValue().
	 */
	public final static long byteArrayToUnsignedLong(final byte [] src) {
		long value = 0;
		for(int i = 0; i < src.length; i++) {
			value = value << 8;
			// Java will assume the byte is signed, so extend it and trim it.
			int b = (src[i]) & 0xFF;
			value |= b;
		}
		return value;
	}

	/**
	 * Like byteArrayToUnsignedLong, excpet we begin at byte position @start, not
	 * at position 0.  This is commonly used to skip the 1st byte of a CommandMarker.
	 * If @start is 0, works exactly like byteArrayToUnsignedLong(src).
	 * @param src
	 * @param start
	 * @return
	 */
	public final static long byteArrayToUnsignedLong(final byte [] src, int start) {
		long value = 0;
		for(int i = start; i < src.length; i++) {
			value = value << 8;
			// Java will assume the byte is signed, so extend it and trim it.
			int b = (src[i]) & 0xFF;
			value |= b;
		}
		return value;
	}

	/**
	 * Convert a long value to a Big Endian byte array.  Assume
	 * the long is not signed.
	 *
	 * This should be the equivalent of:
	 *		byte [] b = BigInteger.valueOf(toBinaryTimeAsLong()).toByteArray();
			if( 0 == b[0] && b.length > 1 ) {
				byte [] bb = new byte[b.length - 1];
				System.arraycopy(b, 1, bb, 0, bb.length);
				b = bb;
			}

	 */
	private final static byte [] _byte0 = {0};

	public final static byte [] unsignedLongToByteArray(final long value) {
		if( 0 == value )
			return _byte0;

		if( 0 <= value && value <= 0x00FF ) {
			byte [] bb = new byte[1];
			bb[0] = (byte) (value & 0x00FF);
			return bb;
		}


		byte [] out = null;
		int offset = -1;
		for(int i = 7; i >=0; --i) {
			byte b = (byte) ((value >> (i * 8)) & 0xFF);
			if( out == null && b != 0 ) {
				out = new byte[i+1];
				offset = i;
			}
			if( out != null )
				out[ offset - i ] = b;
		}
		return out;
	}

	/**
	 * Like unsignedLongToByteArray, except we specify what the first byte should be, so the
	 * array is 1 byte longer than normal.  This is used by things that need a CommandMarker.
	 *
	 * If the value is 0, then the array will be 1 byte with only @fistByte.  The 0x00 byte
	 * will not be included.
	 */
	public final static byte [] unsignedLongToByteArray(final long value, final byte firstByte) {
		// A little bit of unwinding for common cases.
		// These hit a lot of the SegmentationProfile cases

		if( 0 == value ) {
			byte [] bb = new byte[1];
			bb[0] = firstByte;
			return bb;
		}

		if( 0 <= value && value <= 0x00FF ) {
			byte [] bb = new byte[2];
			bb[0] = firstByte;
			bb[1] = (byte) (value & 0x00FF);
			return bb;
		}

		if( 0 <= value && value <= 0x0000FFFFL ) {
			byte [] bb = new byte[3];
			bb[0] = firstByte;
			bb[1] = (byte) ((value >>> 8) & 0x00FF);
			bb[2] = (byte) (value & 0x00FF);
			return bb;
		}

		byte [] out = null;
		int offset = -1;
		for(int i = 7; i >=0; --i) {
			byte b = (byte) ((value >> (i * 8)) & 0xFF);
			if( out == null && b != 0 ) {
				out = new byte[i+2];
				offset = i;
			}
			if( out != null )
				out[ offset - i + 1 ] = b;
		}
		out[0] = firstByte;
		return out;
	}

}