package edu.stanford.nlp.util; import edu.stanford.nlp.util.logging.Redwood; import java.lang.reflect.Array; import java.util.*; import java.util.function.Function; import java.util.function.Predicate; /** * Static utility methods for operating on arrays. * * Note: You can also find some methods for printing arrays that are tables in * StringUtils. (Search for makeTextTable, etc.) * * @author Huy Nguyen (htnguyen@cs.stanford.edu) * @author Michel Galley (mgalley@stanford.edu) */ public class ArrayUtils { /** A logger for this class */ private static Redwood.RedwoodChannels log = Redwood.channels(ArrayUtils.class); /** * Should not be instantiated */ private ArrayUtils() {} public static byte[] gapEncode(int[] orig) { List<Byte> encodedList = gapEncodeList(orig); byte[] arr = new byte[encodedList.size()]; int i = 0; for (byte b : encodedList) { arr[i++] = b; } return arr; } public static List<Byte> gapEncodeList(int[] orig) { for (int i = 1; i < orig.length; i++) { if (orig[i] < orig[i-1]) { throw new IllegalArgumentException("Array must be sorted!"); } } List<Byte> bytes = new ArrayList<>(); int index = 0; int prevNum = 0; byte currByte = 0 << 8; for (int f : orig) { String n = (f == prevNum ? "" : Integer.toString(f-prevNum, 2)); for (int ii = 0; ii < n.length(); ii++) { if (index == 8) { bytes.add(currByte); currByte = 0 << 8; index = 0; } currByte <<= 1; currByte++; index++; } if (index == 8) { bytes.add(currByte); currByte = 0 << 8; index = 0; } currByte <<= 1; index++; for (int i = 1; i < n.length(); i++) { if (index == 8) { bytes.add(currByte); currByte = 0 << 8; index = 0; } currByte <<= 1; if (n.charAt(i) == '1') { currByte++; } index++; } prevNum = f; } while (index > 0 && index < 9) { if (index == 8) { bytes.add(currByte); break; } currByte <<= 1; currByte++; index++; } return bytes; } public static int[] gapDecode(byte[] gapEncoded) { return gapDecode(gapEncoded, 0, gapEncoded.length); } public static int[] gapDecode(byte[] gapEncoded, int startIndex, int endIndex) { List<Integer> ints = gapDecodeList(gapEncoded, startIndex, endIndex); int[] arr = new int[ints.size()]; int index = 0; for (int i : ints) { arr[index++] = i; } return arr; } public static List<Integer> gapDecodeList(byte[] gapEncoded) { return gapDecodeList(gapEncoded, 0, gapEncoded.length); } public static List<Integer> gapDecodeList(byte[] gapEncoded, int startIndex, int endIndex) { boolean gettingSize = true; int size = 0; List<Integer> ints = new ArrayList<>(); int gap = 0; int prevNum = 0; for (int i = startIndex; i < endIndex; i++) { byte b = gapEncoded[i]; for (int index = 7; index >= 0; index--) { boolean value = ((b >> index) & 1) == 1; if (gettingSize) { if (value) { size++; } else { if (size == 0) { ints.add(prevNum); } else if (size == 1) { prevNum++; ints.add(prevNum); size = 0; } else { gettingSize = false; gap = 1; size--; } } } else { gap <<= 1; if (value) { gap++; } size--; if (size == 0) { prevNum += gap; ints.add(prevNum); gettingSize = true; } } } } return ints; } public static byte[] deltaEncode(int[] orig) { List<Byte> encodedList = deltaEncodeList(orig); byte[] arr = new byte[encodedList.size()]; int i = 0; for (byte b : encodedList) { arr[i++] = b; } return arr; } public static List<Byte> deltaEncodeList(int[] orig) { for (int i = 1; i < orig.length; i++) { if (orig[i] < orig[i-1]) { throw new IllegalArgumentException("Array must be sorted!"); } } List<Byte> bytes = new ArrayList<>(); int index = 0; int prevNum = 0; byte currByte = 0 << 8; for (int f : orig) { String n = (f == prevNum ? "" : Integer.toString(f-prevNum, 2)); String n1 = (n.isEmpty() ? "" : Integer.toString(n.length(), 2)); for (int ii = 0; ii < n1.length(); ii++) { if (index == 8) { bytes.add(currByte); currByte = 0 << 8; index = 0; } currByte <<= 1; currByte++; index++; } if (index == 8) { bytes.add(currByte); currByte = 0 << 8; index = 0; } currByte <<= 1; index++; for (int i = 1; i < n1.length(); i++) { if (index == 8) { bytes.add(currByte); currByte = 0 << 8; index = 0; } currByte <<= 1; if (n1.charAt(i) == '1') { currByte++; } index++; } for (int i = 1; i < n.length(); i++) { if (index == 8) { bytes.add(currByte); currByte = 0 << 8; index = 0; } currByte <<= 1; if (n.charAt(i) == '1') { currByte++; } index++; } prevNum = f; } while (index > 0 && index < 9) { if (index == 8) { bytes.add(currByte); break; } currByte <<= 1; currByte++; index++; } return bytes; } public static int[] deltaDecode(byte[] deltaEncoded) { return deltaDecode(deltaEncoded, 0, deltaEncoded.length); } public static int[] deltaDecode(byte[] deltaEncoded, int startIndex, int endIndex) { List<Integer> ints = deltaDecodeList(deltaEncoded); int[] arr = new int[ints.size()]; int index = 0; for (int i : ints) { arr[index++] = i; } return arr; } public static List<Integer> deltaDecodeList(byte[] deltaEncoded) { return deltaDecodeList(deltaEncoded, 0, deltaEncoded.length); } public static List<Integer> deltaDecodeList(byte[] deltaEncoded, int startIndex, int endIndex) { boolean gettingSize1 = true; boolean gettingSize2 = false; int size1 = 0; List<Integer> ints = new ArrayList<>(); int gap = 0; int size2 = 0; int prevNum = 0; for (int i = startIndex; i < endIndex; i++) { byte b = deltaEncoded[i]; for (int index = 7; index >= 0; index--) { boolean value = ((b >> index) & 1) == 1; if (gettingSize1) { if (value) { size1++; } else { if (size1 == 0) { ints.add(prevNum); } else if (size1 == 1) { prevNum++; ints.add(prevNum); size1 = 0; } else { gettingSize1 = false; gettingSize2 = true; size2 = 1; size1--; } } } else if (gettingSize2) { size2 <<= 1; if (value) { size2++; } size1--; if (size1 == 0) { gettingSize2 = false; gap = 1; size2--; } } else { gap <<= 1; if (value) { gap++; } size2--; if (size2 == 0) { prevNum += gap; ints.add(prevNum); gettingSize1 = true; } } } } return ints; } /** helper for gap encoding. */ private static byte[] bitSetToByteArray(BitSet bitSet) { while (bitSet.length() % 8 != 0) { bitSet.set(bitSet.length(), true); } byte[] array = new byte[bitSet.length()/8]; for (int i = 0; i < array.length; i++) { int offset = i * 8; int index = 0; for (int j = 0; j < 8; j++) { index <<= 1; if (bitSet.get(offset+j)) { index++; } } array[i] = (byte)(index - 128); } return array; } /** helper for gap encoding. */ private static BitSet byteArrayToBitSet(byte[] array) { BitSet bitSet = new BitSet(); int index = 0; for (byte b : array) { int b1 = ((int)b) + 128; bitSet.set(index++, ((b1 >> 7) & 1) == 1); bitSet.set(index++, ((b1 >> 6) & 1) == 1); bitSet.set(index++, ((b1 >> 5) & 1) == 1); bitSet.set(index++, ((b1 >> 4) & 1) == 1); bitSet.set(index++, ((b1 >> 3) & 1) == 1); bitSet.set(index++, ((b1 >> 2) & 1) == 1); bitSet.set(index++, ((b1 >> 1) & 1) == 1); bitSet.set(index++, (b1 & 1) == 1); } return bitSet; } // for (int i = 1; i < orig.length; i++) { // if (orig[i] < orig[i-1]) { throw new RuntimeException("Array must be sorted!"); } // StringBuilder bits = new StringBuilder(); // int prevNum = 0; // for (int f : orig) { // StringBuilder bits1 = new StringBuilder(); // log.info(f+"\t"); // String n = Integer.toString(f-prevNum, 2); // String n1 = Integer.toString(n.length(), 2); // for (int ii = 0; ii < n1.length(); ii++) { // bits1.append("1"); // } // bits1.append("0"); // bits1.append(n1.substring(1)); // bits1.append(n.substring(1)); // log.info(bits1+"\t"); // bits.append(bits1); // prevNum = f; // } public static double[] flatten(double[][] array) { int size = 0; for (double[] a : array) { size += a.length; } double[] newArray = new double[size]; int i = 0; for (double[] a : array) { for (double d : a) { newArray[i++] = d; } } return newArray; } public static double[][] to2D(double[] array, int dim1Size) { int dim2Size = array.length/dim1Size; return to2D(array, dim1Size, dim2Size); } public static double[][] to2D(double[] array, int dim1Size, int dim2Size) { double[][] newArray = new double[dim1Size][dim2Size]; int k = 0; for (int i = 0; i < newArray.length; i++) { for (int j = 0; j < newArray[i].length; j++) { newArray[i][j] = array[k++]; } } return newArray; } /** * Removes the element at the specified index from the array, and returns * a new array containing the remaining elements. If <tt>index</tt> is * invalid, returns <tt>array</tt> unchanged. */ public static double[] removeAt(double[] array, int index) { if (array == null) { return null; } if (index < 0 || index >= array.length) { return array; } double[] retVal = new double[array.length - 1]; for (int i = 0; i < array.length; i++) { if (i < index) { retVal[i] = array[i]; } else if (i > index) { retVal[i - 1] = array[i]; } } return retVal; } /** * Removes the element at the specified index from the array, and returns * a new array containing the remaining elements. If <tt>index</tt> is * invalid, returns <tt>array</tt> unchanged. Uses reflection to determine * the type of the array and returns an array of the appropriate type. */ public static Object[] removeAt(Object[] array, int index) { if (array == null) { return null; } if (index < 0 || index >= array.length) { return array; } Object[] retVal = (Object[]) Array.newInstance(array[0].getClass(), array.length - 1); for (int i = 0; i < array.length; i++) { if (i < index) { retVal[i] = array[i]; } else if (i > index) { retVal[i - 1] = array[i]; } } return retVal; } public static String toString(int[][] a) { StringBuilder result = new StringBuilder("["); for (int i = 0; i < a.length; i++) { result.append(Arrays.toString(a[i])); if(i < a.length-1) result.append(','); } result.append(']'); return result.toString(); } /** * Tests two int[][] arrays for having equal contents. * @return true iff for each i, <code>equalContents(xs[i],ys[i])</code> is true */ public static boolean equalContents(int[][] xs, int[][] ys) { if(xs ==null) return ys == null; if(ys == null) return false; if(xs.length != ys.length) return false; for(int i = xs.length-1; i >= 0; i--) { if(! equalContents(xs[i],ys[i])) return false; } return true; } /** * Tests two double[][] arrays for having equal contents. * @return true iff for each i, <code>equals(xs[i],ys[i])</code> is true */ public static boolean equals(double[][] xs, double[][] ys) { if(xs == null) return ys == null; if(ys == null) return false; if(xs.length != ys.length) return false; for(int i = xs.length-1; i >= 0; i--) { if(!Arrays.equals(xs[i],ys[i])) return false; } return true; } /** * tests two int[] arrays for having equal contents * @return true iff xs and ys have equal length, and for each i, <code>xs[i]==ys[i]</code> */ public static boolean equalContents(int[] xs, int[] ys) { if(xs.length != ys.length) return false; for(int i = xs.length-1; i >= 0; i--) { if(xs[i] != ys[i]) return false; } return true; } /** * Tests two boolean[][] arrays for having equal contents. * @return true iff for each i, <code>Arrays.equals(xs[i],ys[i])</code> is true */ @SuppressWarnings("null") public static boolean equals(boolean[][] xs, boolean[][] ys) { if(xs == null && ys != null) return false; if(ys == null) return false; if(xs.length != ys.length) return false; for(int i = xs.length-1; i >= 0; i--) { if(! Arrays.equals(xs[i],ys[i])) return false; } return true; } /** Returns true iff object o equals (not ==) some element of array a. */ public static <T> boolean contains(T[] a, T o) { for (T item : a) { if (item.equals(o)) return true; } return false; } // from stackoverflow // http://stackoverflow.com/questions/80476/how-to-concatenate-two-arrays-in-java /** * Concatenates two arrays and returns the result */ public static <T> T[] concatenate(T[] first, T[] second) { T[] result = Arrays.copyOf(first, first.length + second.length); System.arraycopy(second, 0, result, first.length, second.length); return result; } /** * Returns an array with only the elements accepted by <code>filter</code> * <br> * Implementation notes: creates two arrays, calls <code>filter</code> * once for each element, does not alter <code>original</code> */ public static <T> T[] filter(T[] original, Predicate<? super T> filter) { T[] result = Arrays.copyOf(original, original.length); // avoids generic array creation compile error int size = 0; for (T value : original) { if (filter.test(value)) { result[size] = value; size++; } } if (size == original.length) { return result; } return Arrays.copyOf(result, size); } /** Return a Set containing the same elements as the specified array. */ public static <T> Set<T> asSet(T[] a) { return Generics.newHashSet(Arrays.asList(a)); } /** Return an immutable Set containing the same elements as the specified * array. Arrays with 0 or 1 elements are special cased to return the * efficient small sets from the Collections class. */ public static <T> Set<T> asImmutableSet(T[] a) { if (a.length == 0) { return Collections.emptySet(); } else if (a.length == 1) { return Collections.singleton(a[0]); } else { return Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList(a))); } } public static void fill(double[][] d, double val) { for (double[] aD : d) { Arrays.fill(aD, val); } } public static void fill(double[][][] d, double val) { for (double[][] aD : d) { fill(aD, val); } } public static void fill(double[][][][] d, double val) { for (double[][][] aD : d) { fill(aD, val); } } public static void fill(boolean[][] d, boolean val) { for (boolean[] aD : d) { Arrays.fill(aD, val); } } public static void fill(boolean[][][] d, boolean val) { for (boolean[][] aD : d) { fill(aD, val); } } public static void fill(boolean[][][][] d, boolean val) { for (boolean[][][] aD : d) { fill(aD, val); } } /** * Casts to a double array */ public static double[] toDouble(float[] a) { double[] d = new double[a.length]; for (int i = 0; i < a.length; i++) { d[i] = a[i]; } return d; } /** * Casts to a double array. */ public static double[] toDouble(int[] array) { double[] rv = new double[array.length]; for (int i = 0; i < array.length; i++) { rv[i] = array[i]; } return rv; } /** needed because Arrays.asList() won't to autoboxing, * so if you give it a primitive array you get a * singleton list back with just that array as an element. */ public static List<Integer> asList(int[] array) { List<Integer> l = new ArrayList<>(); for (int i : array) { l.add(i); } return l; } public static double[] asPrimitiveDoubleArray(Collection<Double> d) { double[] newD = new double[d.size()]; int i = 0; for (Double j : d) { newD[i++] = j; } return newD; } public static int[] asPrimitiveIntArray(Collection<Integer> d) { int[] newI = new int[d.size()]; int i = 0; for (Integer j : d) { newI[i++] = j; } return newI; } public static long[] copy(long[] arr) { if (arr == null) { return null; } long[] newArr = new long[arr.length]; System.arraycopy(arr, 0, newArr, 0, arr.length); return newArr; } public static int[] copy(int[] i) { if (i == null) { return null; } int[] newI = new int[i.length]; System.arraycopy(i, 0, newI, 0, i.length); return newI; } public static int[][] copy(int[][] i) { if (i == null) { return null; } int[][] newI = new int[i.length][]; for (int j = 0; j < newI.length; j++) { newI[j] = copy(i[j]); } return newI; } public static double[] copy(double[] d) { if (d == null) { return null; } double[] newD = new double[d.length]; System.arraycopy(d, 0, newD, 0, d.length); return newD; } public static double[][] copy(double[][] d) { if (d == null) { return null; } double[][] newD = new double[d.length][]; for (int i = 0; i < newD.length; i++) { newD[i] = copy(d[i]); } return newD; } public static double[][][] copy(double[][][] d) { if (d == null) { return null; } double[][][] newD = new double[d.length][][]; for (int i = 0; i < newD.length; i++) { newD[i] = copy(d[i]); } return newD; } public static float[] copy(float[] d) { if (d == null) { return null; } float[] newD = new float[d.length]; System.arraycopy(d, 0, newD, 0, d.length); return newD; } public static float[][] copy(float[][] d) { if (d == null) { return null; } float[][] newD = new float[d.length][]; for (int i = 0; i < newD.length; i++) { newD[i] = copy(d[i]); } return newD; } public static float[][][] copy(float[][][] d) { if (d == null) { return null; } float[][][] newD = new float[d.length][][]; for (int i = 0; i < newD.length; i++) { newD[i] = copy(d[i]); } return newD; } public static String toString(double[][] b) { StringBuilder result = new StringBuilder("["); for (int i = 0; i < b.length; i++) { result.append(Arrays.toString(b[i])); if(i < b.length-1) result.append(','); } result.append(']'); return result.toString(); } public static String toString(boolean[][] b) { StringBuilder result = new StringBuilder("["); for (int i = 0; i < b.length; i++) { result.append(Arrays.toString(b[i])); if(i < b.length-1) result.append(','); } result.append(']'); return result.toString(); } public static long[] toPrimitive(Long[] in) { return toPrimitive(in,0L); } public static int[] toPrimitive(Integer[] in) { return toPrimitive(in,0); } public static short[] toPrimitive(Short[] in) { return toPrimitive(in,(short)0); } public static char[] toPrimitive(Character[] in) { return toPrimitive(in,(char)0); } public static double[] toPrimitive(Double[] in) { return toPrimitive(in,0.0); } public static long[] toPrimitive(Long[] in, long valueForNull) { if (in == null) return null; final long[] out = new long[in.length]; for (int i = 0; i < in.length; i++) { Long b = in[i]; out[i] = (b == null ? valueForNull : b); } return out; } public static int[] toPrimitive(Integer[] in, int valueForNull) { if (in == null) return null; final int[] out = new int[in.length]; for (int i = 0; i < in.length; i++) { Integer b = in[i]; out[i] = (b == null ? valueForNull : b); } return out; } public static short[] toPrimitive(Short[] in, short valueForNull) { if (in == null) return null; final short[] out = new short[in.length]; for (int i = 0; i < in.length; i++) { Short b = in[i]; out[i] = (b == null ? valueForNull : b); } return out; } public static char[] toPrimitive(Character[] in, char valueForNull) { if (in == null) return null; final char[] out = new char[in.length]; for (int i = 0; i < in.length; i++) { Character b = in[i]; out[i] = (b == null ? valueForNull : b); } return out; } public static double[] toDoubleArray(String[] in) { double[] ret = new double[in.length]; for (int i = 0; i < in.length; i++) ret[i] = Double.parseDouble(in[i]); return ret; } public static double[] toPrimitive(Double[] in, double valueForNull) { if (in == null) return null; final double[] out = new double[in.length]; for (int i = 0; i < in.length; i++) { Double b = in[i]; out[i] = (b == null ? valueForNull : b); } return out; } /** * Provides a consistent ordering over arrays. First compares by the * first element. If that element is equal, the next element is * considered, and so on. This is the array version of * {@link edu.stanford.nlp.util.CollectionUtils#compareLists} * and uses the same logic when the arrays are of different lengths. */ public static <T extends Comparable<T>> int compareArrays(T[] first, T[] second) { List<T> firstAsList = Arrays.asList(first); List<T> secondAsList = Arrays.asList(second); return CollectionUtils.compareLists(firstAsList, secondAsList); } /* -- This is an older more direct implementation of the above, but not necessary unless for performance public static <C extends Comparable<C>> int compareArrays(C[] a1, C[] a2) { int len = Math.min(a1.length, a2.length); for (int i = 0; i < len; i++) { int comparison = a1[i].compareTo(a2[i]); if (comparison != 0) return comparison; } // one is a prefix of the other, or they're identical if (a1.length < a2.length) return -1; if (a1.length > a2.length) return 1; return 0; } */ public static List<Integer> getSubListIndex(Object[] tofind, Object[] tokens){ return getSubListIndex(tofind, tokens, (o1) -> o1.first().equals(o1.second())); } /** * If tofind is a part of tokens, it finds the ****starting index***** of tofind in tokens * If tofind is not a sub-array of tokens, then it returns null * note that tokens sublist should have the exact elements and order as in tofind * @param tofind array you want to find in tokens * @param tokens * @param matchingFunction function that takes (tofindtoken, token) pair and returns whether they match * @return starting index of the sublist */ public static List<Integer> getSubListIndex(Object[] tofind, Object[] tokens, Function<Pair, Boolean> matchingFunction){ if(tofind.length > tokens.length) return null; List<Integer> allIndices = new ArrayList<>(); boolean matched = false; int index = -1; int lastUnmatchedIndex = 0; for(int i = 0 ; i < tokens.length;){ for(int j = 0; j < tofind.length ;){ if(matchingFunction.apply(new Pair(tofind[j], tokens[i]))){ index = i; i++; j++; if(j == tofind.length) { matched = true; break; } }else{ j = 0; i = lastUnmatchedIndex +1; lastUnmatchedIndex = i; index = -1; if(lastUnmatchedIndex == tokens.length) break; } if(i >= tokens.length){ index = -1; break; } } if(i == tokens.length || matched){ if(index >= 0) //index = index - l1.length + 1; allIndices.add(index - tofind.length + 1); matched = false; lastUnmatchedIndex = index; //break; } } //get starting point return allIndices; } /** Returns a new array which has the numbers in the input array * L1-normalized. * * @param ar Input array * @return New array that has L1 normalized form of input array */ public static double[] normalize(double[] ar) { double[] ar2 = new double[ar.length]; double total = 0.0; for (double d : ar) { total += d; } for (int i = 0; i < ar.length; i++) { ar2[i] = ar[i]/total; } return ar2; } public static Object[] subArray(Object[] arr, int startindexInclusive, int endindexExclusive){ if(arr == null) return arr; Class type = arr.getClass().getComponentType(); if(endindexExclusive < startindexInclusive || startindexInclusive > arr.length -1 ) return (Object[]) Array.newInstance(type, 0); if(endindexExclusive > arr.length) endindexExclusive = arr.length; if(startindexInclusive < 0) startindexInclusive = 0; Object[] b = (Object[]) Array.newInstance(type, endindexExclusive - startindexInclusive); System.arraycopy(arr, startindexInclusive, b, 0, endindexExclusive - startindexInclusive); return b; } public static int compareBooleanArrays(boolean[] a1, boolean[] a2) { int len = Math.min(a1.length, a2.length); for (int i = 0; i < len; i++) { if (!a1[i] && a2[i]) return -1; if (a1[i] && !a2[i]) return 1; } // one is a prefix of the other, or they're identical if (a1.length < a2.length) return -1; if (a1.length > a2.length) return 1; return 0; } public static String toString(double[] doubles, String glue) { String s = ""; for(int i = 0; i < doubles.length; i++){ if(i==0) s = String.valueOf(doubles[i]); else s+= glue + String.valueOf(doubles[i]); } return s; } }