/* * Part of the CCNx Java Library. * * Copyright (C) 2008-2012 Palo Alto Research Center, Inc. * * This library is free software; you can redistribute it and/or modify it * under the terms of the GNU Lesser General Public License version 2.1 * as published by the Free Software Foundation. * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. You should have received * a copy of the GNU Lesser General Public License along with this library; * if not, write to the Free Software Foundation, Inc., 51 Franklin Street, * Fifth Floor, Boston, MA 02110-1301 USA. */ package org.ccnx.ccn.impl.support; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.math.BigInteger; import java.nio.charset.Charset; import java.nio.charset.UnsupportedCharsetException; import java.util.ArrayList; import org.bouncycastle.util.encoders.Base64; import org.ccnx.ccn.config.SystemConfiguration; /** * Miscellaneous utility routines for CCN, mostly data comparison and conversion. */ public final class DataUtils { public static final int BITS_PER_BYTE = 8; public static final String EMPTY = ""; public static final String LINE_SEPARATOR = System.getProperty("line.separator"); /** * Useful when we move over to 1.6, and can avoid UnsupportedCharsetExceptions this way. */ public static Charset UTF8_CHARSET; static { try { UTF8_CHARSET = Charset.forName("UTF-8"); if (null == UTF8_CHARSET) { // This shouldn't happen, but be noisy about it if it does... throw new UnsupportedCharsetException("Attempt to retrieve the UTF-8 charset returned null! Significant configuration error!"); } } catch (Exception e) { // Should be UnsupportedCharsetException or IllegalCharsetNameException Log.severe("Unknown encoding UTF-8! This is a significant configuration problem."); throw new RuntimeException("Cannot find UTF-8 encoding. Significant configuration error"); } } public static <T extends Comparable<T>> int compare(T left, T right) { int result = 0; if (null != left) { if (null == right) return 1; // sort nothing before something result = left.compareTo(right); } else { if (null != right) result = -1; // sort nothing before something // else fall through and compare publishers else result = 0; // null == null } return result; } /** * Perform a shortlex comparison of byte arrays in canonical CCN ordering. * Shortlex ordering is ordering by cardinality, then by lexigraphic. * * MM - This method should really be renamed to "shortlex" or something * other than "compare", unless it is needed for an Override name. * * @param left * @param right * @return < 0 if left comes before right, 0 if they are equal, > 0 if left comes after right */ public static int compare(byte [] left, byte [] right) { if (null != left) { if (null == right) { return (1); } else { int leftLength = left.length; int rightLength = right.length; // If a is shorter than b then a comes before b if (leftLength < rightLength) { return (-1); } else if (leftLength > rightLength) { return (1); } else { // They have equal lengths - compare byte by byte for (int i=0; i < leftLength; ++i) { short leftSubI = (short)(left[i] & 0xff); short rightSubI = (short)(right[i] & 0xff); if (leftSubI < rightSubI) { return (-1); } else if (leftSubI > rightSubI) { return (1); } } } } } else { if (null != right) return (-1); // sort nothing before something // else fall through and compare publishers else return (0); // null == null } return (0); } /** * This is not like compare(byte[], byte[]). That is shortlex. This * is an actual lexigraphic ordering based on the shortlex compare * of each byte array. * @see compare(byte[], byte[]) */ public static int compare(ArrayList<byte []> left, ArrayList<byte []> right) { int result = 0; if (null != left) { if (null == right) { result = 1; } else { // here we have the comparison. int leftSize = left.size(); int rightSize = right.size(); int minlen = (leftSize < rightSize) ? leftSize : rightSize; for (int i=0; i < minlen; ++i) { result = compare(left.get(i), right.get(i)); if (0 != result) break; } if (result == 0) { // ok, they're equal up to the minimum length if (leftSize < rightSize) { result = -1; } else if (leftSize > rightSize) { result = 1; } // else they're equal, result = 0 } } } else { if (null != right) result = -1; // sort nothing before something // else fall through and compare publishers else result = 0; // null == null } return result; } /** * Used to print non ASCII components for logging, etc. * * @param bytes * @return the data as a BigInteger String */ public static String printBytes(byte [] bytes) { if (bytes == null) { return ""; } BigInteger bi = new BigInteger(1, bytes); return bi.toString(SystemConfiguration.DEBUG_RADIX); } /** * Used to print components to be interpreted as hexadecimal such as segments * @param bytes * @return the data as a Hexadecimal String */ public static String printHexBytes(byte [] bytes) { if ((null == bytes) || (bytes.length == 0)) { return "<empty>"; } BigInteger bi = new BigInteger(1, bytes); return bi.toString(16); } /** * A place to centralize interfaces to base64 encoding/decoding, as the classes * we use change depending on what ships with Java. */ public static byte [] base64Decode(byte [] input) throws IOException { return Base64.decode(input); } public static byte [] base64Encode(byte [] input) { return Base64.encode(input); } public static final int LINELEN = 64; public static String base64Encode(byte [] input, Integer lineLength) { byte [] encodedBytes = base64Encode(input); return lineWrap(DataUtils.getUTF8StringFromBytes(encodedBytes), LINELEN); } /** * @deprecated not used in CCNx, candidate for removal in future release. * @param input * @param lineLength * @return the byte array with added CRLF line-breaks and null termination. */ @Deprecated public static byte [] lineWrapBase64(byte [] input, int lineLength) { int finalLen = input.length + 2*(input.length/lineLength) + 3; byte output[] = new byte[finalLen]; // add line breaks int outidx = 0; int inidx = 0; while (inidx < input.length) { output[outidx] = input[inidx]; outidx++; inidx++; if ((inidx % lineLength) == 0) { output[outidx++] = (byte)0x0D; output[outidx++] = (byte)0x0A; } } output[outidx]='\0'; return (output); } /** * @param inputString * @param lineLength * @return */ public static String lineWrap(String inputString, int lineLength) { if ((null == inputString) || (inputString.length() <= lineLength)) { return inputString; } StringBuffer line = new StringBuffer(inputString); int length = inputString.length(); int sepLen = LINE_SEPARATOR.length(); int index = lineLength - sepLen; while (index < length - sepLen) { line.insert(index, LINE_SEPARATOR); index += lineLength; length += sepLen; } return line.toString(); } /** * byte array compare * @param left * @param right * @return true if equal */ public static boolean arrayEquals(byte[] left, byte[] right) { if (left == null) { return ((right == null) ? true : false); } if (right == null) { return ((left == null) ? true : false); } if (left.length != right.length) return false; for (int i = 0; i < left.length; i++) { if (left[i] != right[i]) return false; } return true; } /** * byte array compare * @param left * @param right * @param length * @return true if equal */ public static boolean arrayEquals(byte[] left, byte[] right, int length) { if (left == null) { return ((right == null) ? true : false); } if (right == null) { return ((left == null) ? true : false); } // If one of left or right is shorter than length, arrays // must be same length to be equal. if( left.length < length || right.length < length ) if( left.length != right.length ) return false; int minarray = (left.length < right.length) ? left.length : right.length; int minlen = (length < minarray) ? length : minarray; for (int i = 0; i < minlen; i++) { if (left[i] != right[i]) return false; } return true; } /** * Check if a byte array starts with a certain prefix. * * Used to check for binary prefixes used to mark certain ContentName components for special purposes. * * @param prefix bytes to look for, if null this method always returns true. * @param data data to inspect. If null this method always returns false. * @return true if data starts with prefix. */ public static boolean isBinaryPrefix(byte [] prefix, byte [] data) { if ((null == prefix) || (prefix.length == 0)) return true; if ((null == data) || (data.length < prefix.length)) return false; for (int i=0; i < prefix.length; ++i) { if (prefix[i] != data[i]) return false; } return true; } /** * Recursively delete a directory and all its contents. * If given File does not exist, this method returns with no error * but if it exists as a file not a directory, an exception will be thrown. * Similar to org.apache.commons.io.FileUtils.deleteDirectory * but avoids dependency on that library for minimal use. * @param directory * @throws IOException if "directory" is a file */ public static void deleteDirectory(File directory) throws IOException { if (!directory.exists()) { return; } if (!directory.isDirectory()) { throw new IOException(directory.getPath() + " is not a directory"); } for (File child : directory.listFiles()) { if (child.isDirectory()) { deleteDirectory(child); } else { child.delete(); } } directory.delete(); } /** * This was used in early content demos; keep it around as it may be generally useful. * @param file * @return * @throws IOException */ public static byte[] getBytesFromFile(File file) throws IOException { InputStream is = new FileInputStream(file); // Get the size of the file long length = file.length(); if (length > Integer.MAX_VALUE) { throw new IOException("File is too large: " + file.getName()); } // Create the byte array to hold the data byte[] bytes = new byte[(int)length]; // Read in the bytes int offset = 0; int numRead = 0; while (offset < bytes.length && (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) { offset += numRead; } // Ensure all the bytes have been read in if (offset < bytes.length) { throw new IOException("Could not completely read file "+file.getName()); } // Close the input stream and return bytes is.close(); return bytes; } /** * Read a stream (usually small) completely in to a byte array. Used to get all of the * bytes out of one or more content objects for decoding or other processing, where the * content needs to be handed to something else as a unit. */ public static byte [] getBytesFromStream(InputStream input) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); byte [] buf = new byte[1024]; int byteCount = 0; byteCount = input.read(buf); while (byteCount > 0) { baos.write(buf, 0, byteCount); byteCount = input.read(buf); } return baos.toByteArray(); } /** * Wrap up handling of UTF-8 encoding in one place (as much as possible), because * an UnsupportedEncodingException in response to a request for UTF-8 signals * a significant configuration error; we should catch it and signal a RuntimeException * in one place and let the rest of the code not worry about it. */ public static String getUTF8StringFromBytes(byte [] stringBytes) { try { // Version taking a Charset not available till 1.6. return new String(stringBytes, "UTF-8"); } catch (UnsupportedEncodingException e) { Log.severe("Unknown encoding UTF-8! This is a significant configuration problem."); throw new RuntimeException("Unknown encoding UTF-8! This is a significant configuration problem."); } } /** * Wrap up handling of UTF-8 encoding in one place (as much as possible), because * an UnsupportedEncodingException in response to a request for UTF-8 signals * a significant configuration error; we should catch it and signal a RuntimeException * in one place and let the rest of the code not worry about it. */ public static byte [] getBytesFromUTF8String(String stringData) { try { // Version taking a Charset not available till 1.6. return stringData.getBytes("UTF-8"); } catch (UnsupportedEncodingException e) { Log.severe("Unknown encoding UTF-8! This is a significant configuration problem."); throw new RuntimeException("Unknown encoding UTF-8! This is a significant configuration problem."); } } /** * Lexicographically compare two byte arrays, looking at a limited number of bytes. * @param arr1 * @param arr2 * @param count Maximum number of bytes to inspect. * @return < 0 if left comes before right, 0 if they are equal, > 0 if left comes after right */ public static int bytencmp(byte[] arr1, int offset1, byte[] arr2, int offset2, int count) { if (null == arr1) { if (null == arr2) return 0; return 1; } if (null == arr2) return -1; int cmpcount = Math.min(Math.min(count, (arr1.length-offset1)), (arr2.length-offset2)); for (int i=offset1, j=offset2; i < cmpcount; ++i, ++j) { if (arr1[i] < arr2[j]) return -1; if (arr1[i] > arr2[j]) return 1; } if (cmpcount == count) return 0; // OK, they match up to the length of the shortest one, which is shorter // than count. Whichever is shorter is less. if (arr1.length > arr2.length) return 1; if (arr1.length < arr2.length) return -1; return 0; } public static int bytencmp(byte [] arr1, byte [] arr2, int count) { return bytencmp(arr1, 0, arr2, 0, count); } /** * Finds the index of the first occurrence of byteToFind in array starting at given * offset, returns 01 if not found. * @param array array to search * @param startingOffset offset into array to start at * @param byteToFind byte to seek * @return position in array containing first occurrence of byteToFind, or array.length if not found */ public static int byteindex(byte [] array, int startingOffset, byte byteToFind) { int byteindex; for (byteindex = startingOffset; byteindex < array.length; byteindex++) { if (array[byteindex] == byteToFind) break; } return (byteindex == array.length) ? -1 : byteindex; } /** * Finds the index of the first occurrence of byteToFind in array, returns -1 if not found. * @param array array to search * @param byteToFind byte to seek * @return position in array containing first occurrence of byteToFind, or array.length if not found */ public static int byteindex(byte [] array, byte byteToFind) { return byteindex(array, 0, byteToFind); } /** * Finds the index of the last occurrence of byteToFind in array starting at given * offset, returns -1 if not found. * @param array array to search * @param startingOffset offset into array to start at * @param byteToFind byte to seek * @return position in array containing first occurrence of byteToFind, or array.length if not found */ public static int byterindex(byte [] array, int startingOffset, byte byteToFind) { int byteindex; for (byteindex = startingOffset; byteindex >= 0; byteindex--) { if (array[byteindex] == byteToFind) break; } return byteindex; } /** * Finds the last of the first occurrence of byteToFind in array, returns -1 if not found. * @param array array to search * @param byteToFind byte to seek * @return position in array containing first occurrence of byteToFind, or array.length if not found */ public static int byterindex(byte [] array, byte byteToFind) { return byterindex(array, (array != null) ? array.length : 0, byteToFind); } /** * Count how may times a given byte occurs in an array. */ public static int occurcount(byte [] array, int startingOffset, int length, byte byteToFind) { int count = 0; if (array == null) return 0; for (int i=startingOffset; i < length; ++i) { if (array[i] == byteToFind) { count++; } } return count; } public static int occurcount(byte [] array, int length, byte byteToFind) { return occurcount(array, 0, (null != array) ? array.length : -1, byteToFind); } public static int occurcount(byte [] array, byte byteToFind) { return occurcount(array, 0, byteToFind); } /** * Akin to String.split for binary arrays; splits on a given byte value. */ public static byte [][] binarySplit(byte [] array, int startingOffset, byte splitValue) { int index = 0; int offset = 0; int lastoffset = startingOffset; int count = occurcount(array, startingOffset, splitValue) + 1; if (count == 1) { // no split values; just return the original array return new byte [][]{array}; } byte [][] components = new byte[count][]; while (index < count) { offset = byteindex(array, lastoffset, splitValue); if (offset < 0) { // last one offset = array.length; } components[index] = new byte[offset - lastoffset]; System.arraycopy(array, lastoffset, components[index], 0, components[index].length); lastoffset = offset + 1; index++; } return components; } public static byte [][] binarySplit(byte [] array, byte splitValue) { return binarySplit(array, 0, splitValue); } public static byte [] subarray(byte [] array, int offset, int len) { byte [] newarray = new byte [len]; System.arraycopy(array, offset, newarray, 0, len); return newarray; } /** * Convert a BigEndian byte array in to a long assuming unsigned values. * No bounds checking is done on the array -- caller should make sure * it is 8 or fewer bytes. * * Should operate like BigInteger(1, bytes).longValue(). */ public final static long byteArrayToUnsignedLong(final byte [] src) { long value = 0; for(int i = 0; i < src.length; i++) { value = value << 8; // Java will assume the byte is signed, so extend it and trim it. int b = (src[i]) & 0xFF; value |= b; } return value; } /** * Like byteArrayToUnsignedLong, excpet we begin at byte position @start, not * at position 0. This is commonly used to skip the 1st byte of a CommandMarker. * If @start is 0, works exactly like byteArrayToUnsignedLong(src). * @param src * @param start * @return */ public final static long byteArrayToUnsignedLong(final byte [] src, int start) { long value = 0; for(int i = start; i < src.length; i++) { value = value << 8; // Java will assume the byte is signed, so extend it and trim it. int b = (src[i]) & 0xFF; value |= b; } return value; } /** * Convert a long value to a Big Endian byte array. Assume * the long is not signed. * * This should be the equivalent of: * byte [] b = BigInteger.valueOf(toBinaryTimeAsLong()).toByteArray(); if( 0 == b[0] && b.length > 1 ) { byte [] bb = new byte[b.length - 1]; System.arraycopy(b, 1, bb, 0, bb.length); b = bb; } */ private final static byte [] _byte0 = {0}; public final static byte [] unsignedLongToByteArray(final long value) { if( 0 == value ) return _byte0; if( 0 <= value && value <= 0x00FF ) { byte [] bb = new byte[1]; bb[0] = (byte) (value & 0x00FF); return bb; } byte [] out = null; int offset = -1; for(int i = 7; i >=0; --i) { byte b = (byte) ((value >> (i * 8)) & 0xFF); if( out == null && b != 0 ) { out = new byte[i+1]; offset = i; } if( out != null ) out[ offset - i ] = b; } return out; } /** * Like unsignedLongToByteArray, except we specify what the first byte should be, so the * array is 1 byte longer than normal. This is used by things that need a CommandMarker. * * If the value is 0, then the array will be 1 byte with only @fistByte. The 0x00 byte * will not be included. */ public final static byte [] unsignedLongToByteArray(final long value, final byte firstByte) { // A little bit of unwinding for common cases. // These hit a lot of the SegmentationProfile cases if( 0 == value ) { byte [] bb = new byte[1]; bb[0] = firstByte; return bb; } if( 0 <= value && value <= 0x00FF ) { byte [] bb = new byte[2]; bb[0] = firstByte; bb[1] = (byte) (value & 0x00FF); return bb; } if( 0 <= value && value <= 0x0000FFFFL ) { byte [] bb = new byte[3]; bb[0] = firstByte; bb[1] = (byte) ((value >>> 8) & 0x00FF); bb[2] = (byte) (value & 0x00FF); return bb; } byte [] out = null; int offset = -1; for(int i = 7; i >=0; --i) { byte b = (byte) ((value >> (i * 8)) & 0xFF); if( out == null && b != 0 ) { out = new byte[i+2]; offset = i; } if( out != null ) out[ offset - i + 1 ] = b; } out[0] = firstByte; return out; } }