/* * File: CollectionUtil.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright March 25, 2008, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. Export * of this program may require a license from the United States Government. * See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.collection; import gov.sandia.cognition.annotation.CodeReview; import gov.sandia.cognition.annotation.PublicationReference; import gov.sandia.cognition.annotation.PublicationType; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; /** * The {@code CollectionUtil} class implements static methods for dealing with * {@code Collection} and {@code Iterable} objects. They are both put into the * same utility class so that they can be interchanged without changing the * method call. * * @author Justin Basilico * @since 2.1 */ @CodeReview( reviewer = "Kevin R. Dixon", date = "2008-12-02", changesNeeded = false, comments = "Looks good." ) public class CollectionUtil extends Object { /** * The default load factor for a hash map is {@value}. */ private static final float DEFAULT_LOAD_FACTOR = 0.75f; /** * Returns true if the given collection is null or empty. * * @param collection The collection to determine if it is null or empty. * @return True if the given collection is null or empty. */ public static boolean isEmpty( final Collection<?> collection) { return collection == null || collection.isEmpty(); } /** * Returns true if the given iterable is null or empty. * * @param iterable The iterable to determine if it is null or empty. * @return True if the given iterable is null or empty. */ public static boolean isEmpty( final Iterable<?> iterable) { if (iterable == null) { // It is null, so it is empty. return true; } else if (iterable instanceof Collection) { return ((Collection<?>) iterable).isEmpty(); } else { return !iterable.iterator().hasNext(); } } /** * Returns the Collection as an ArrayList. It first checks to see if data is * already an ArrayList and returns the casted value. Otherwise, this method * creates a new ArrayList from the data without copying each value. * * @param <DataType> Type of data in the Collection. * @param data Collection to return as an ArrayList. * @return ArrayList of the given Collection. */ public static <DataType> ArrayList<DataType> asArrayList( Iterable<DataType> data) { if (data == null) { return null; } else if (data instanceof ArrayList) { return (ArrayList<DataType>) data; } else if (data instanceof Collection) { return new ArrayList<DataType>((Collection<? extends DataType>) data); } else { final int num = CollectionUtil.size(data); ArrayList<DataType> retval = new ArrayList<DataType>(num); for (DataType value : data) { retval.add(value); } return retval; } } /** * Determines the size of the given collection, checking for null. * * @param collection The collection to get the size of. * @return The size of the collection. If it is null, zero is returned. */ public static int size( final Collection<?> collection) { if (collection == null) { return 0; } else { return collection.size(); } } /** * Determines the size of the given iterable. If it is null, zero is * returned. If it is a {@code Collection}, then the size method is used. * Otherwise, the iterable is iterated over to get the size. * * @param iterable The iterable to determine the size of. * @return The size of the given iterable. */ public static int size( final Iterable<?> iterable) { if (iterable == null) { // The size is zero. return 0; } else if (iterable instanceof Collection) { // Get the size from the collection. This cast is justified by // not having to loop over all the elements. return ((Collection<?>) iterable).size(); } else { // Cound up the elements in the iterable. int counter = 0; final Iterator<?> iterator = iterable.iterator(); while (iterator.hasNext()) { iterator.next(); counter++; } return counter; } } /** * Gets the first element from an iterable. If the iterable is null or * empty, null is returned. * * @param <T> The type of element. * @param iterable The iterable to get the first element from. * @return The first element from the iterable, if one exists. Otherwise, * null. */ public static <T> T getFirst( final Iterable<? extends T> iterable) { if (iterable == null) { // No first element. return null; } final Iterator<? extends T> iterator = iterable.iterator(); if (iterator.hasNext()) { return iterator.next(); } else { // No first element. return null; } } /** * Gets the first element of the list. If the list is null or empty, null is * returned. * * @param <T> The type of element in the list. * @param list The list to get the first element from. * @return The first element from the list, if one exists. Otherwise, null. */ public static <T> T getFirst( final List<? extends T> list) { if (list == null || list.isEmpty()) { return null; } else { return list.get(0); } } /** * Gets the last element of the list. If the list is null or empty, null is * returned. * * @param <T> The type of element in the list. * @param list The list to get the last element from. * @return The last element from the list, if one exists. Otherwise, null. */ public static <T> T getLast( final List<? extends T> list) { if (list == null || list.isEmpty()) { return null; } else { return list.get(list.size() - 1); } } /** * Check if two collections return exactly the same objects in the same * order. This means that both collections must return the same number of * objects. * * @param <T> * @param data1 * @param data2 * @return {@code true} if simultaneous iteration of {@code data1} and * {@code data2} always returns objects for which * {@link Object#equals(java.lang.Object)} is {@code true}; {@code false} * otherwise */ public static <T> boolean equals(Collection<? extends T> data1, Collection<? extends T> data2) { return data1.size() == data2.size() ? equals((Iterable<? extends T>) data1, (Iterable<? extends T>) data2) : false; // Don't waste time iterating if sizes are different } /** * Check if two iterables return exactly the same objects in the same order. * This means that both iterables must return the same number of objects. * * @param <T> * @param data1 * @param data2 * @return {@code true} if simultaneous iteration of {@code data1} and * {@code data2} always returns objects for which * {@link Object#equals(java.lang.Object)} is {@code true}; {@code false} * otherwise */ public static <T> boolean equals(Iterable<? extends T> data1, Iterable<? extends T> data2) { boolean equal = true; Iterator<? extends T> iterator1 = data1.iterator(), iterator2 = data2.iterator(); for (T val1, val2; iterator1.hasNext() && iterator2.hasNext() && equal;) { // Ensure equality of all items val1 = iterator1.next(); val2 = iterator2.next(); equal &= val1.equals(val2); } if (iterator1.hasNext() || iterator2.hasNext()) { // Ensure same size equal = false; } return equal; } /** * Returns the set of indices of the data array such that * data[return[0..k-1]] ≤ data[return[k]] ≤ data[return[k+1...N-1]]. * This algorithm will partition the data set in O(N) time. This is faster * than the typical sort and split time of O(N*log(N)). Note that the * subsets data[return[0..k-1]] and data[return[k+1..N-1]] are themselves * unsorted. Because of this, NRC calls "Selection is sorting's austere * sister." * * @param <ComparableType> Type of data to compare to. * @param k kth largest value to split upon. * @param data Data to partition, left unchanged by this method. * @param comparator Comparator used to determine if two values are greater * than, less than, or equal to each other. * @return Indices into data so that data[return[0..k-1]] ≤ * data[return[k]] ≤ data[return[k+1...N-1]]. */ @PublicationReference( author = { "William H Press", "Saul A. Teukolsky", "William T. Vetterling", "Brian P. Flannery" }, title = "Numerical Recipes, Third Edition", type = PublicationType.Book, year = 2007, pages = 1104, notes = "Loosely based on the selecti() function" ) public static <ComparableType> int[] findKthLargest( int k, ArrayList<? extends ComparableType> data, Comparator<? super ComparableType> comparator) { final int num = data.size(); final int[] indices = new int[num]; for (int i = 0; i < num; i++) { indices[i] = i; } int leftIndex = 0; int rightIndex = num - 1; while (true) { if (rightIndex <= leftIndex + 1) { if (rightIndex == leftIndex + 1) { swapIfAGreaterThanB(leftIndex, rightIndex, indices, data, comparator); } return indices; } else { final int mid = (leftIndex + rightIndex) / 2; swapArrayValues(mid, leftIndex + 1, indices); swapIfAGreaterThanB(leftIndex, rightIndex, indices, data, comparator); swapIfAGreaterThanB(leftIndex + 1, rightIndex, indices, data, comparator); swapIfAGreaterThanB(leftIndex, leftIndex + 1, indices, data, comparator); int i = leftIndex + 1; final int originali = indices[i]; int j = rightIndex; ComparableType valueOriginali = data.get(originali); while (true) { // Find from the left a value that is >= valueOriginali do { i++; } while (comparator.compare(data.get(indices[i]), valueOriginali) < 0); // Find from the right a value that is <= valueOriginali do { j--; } while (comparator.compare(data.get(indices[j]), valueOriginali) > 0); if (j < i) { break; } swapArrayValues(i, j, indices); } indices[leftIndex + 1] = indices[j]; indices[j] = originali; if (j >= k) { rightIndex = j - 1; } if (j <= k) { leftIndex = i; } } } } /** * Swaps the indices "a" and "b" in the array "indices" if the corresponding * data values data[indices[a]] is greater than data[indices[b]]. * * @param <ComparableType> Type of data to compare to. * @param a first index * @param b second index * @param indices array of indices to index into "data", which is modified * by this method. * @param data ArrayList of values, unchanged. * @param comparator Comparator used to determine if two values are greater * than, less than, or equal to each other. * @return True if swapped, false if left alone. */ private static <ComparableType> boolean swapIfAGreaterThanB( int a, int b, int[] indices, ArrayList<? extends ComparableType> data, Comparator<? super ComparableType> comparator) { final boolean doSwap = comparator.compare( data.get(indices[a]), data.get(indices[b])) > 0; if (doSwap) { swapArrayValues(a, b, indices); } return doSwap; } /** * Swaps the two indexed values in the indices array. * * @param i1 First index * @param i2 Second index * @param indices Array of indices to swap */ private static void swapArrayValues( int i1, int i2, int[] indices) { int temp = indices[i1]; indices[i1] = indices[i2]; indices[i2] = temp; } /** * Creates a partition of the given data into "numPartition" roughly equal * sets, preserving their pre-existing sequential ordering, with the nonzero * remainder elements going into the final partition. * * @param <DataType> Type of data to partition. * @param data Collection of data to partition * @param numPartitions Number of partitions to create. * @return List of Lists of size data.size()/numPartitions, with the * remainder of data elements going into the final partition. */ public static <DataType> ArrayList<List<? extends DataType>> createSequentialPartitions( Iterable<? extends DataType> data, int numPartitions) { if (data instanceof List<?>) { @SuppressWarnings("unchecked") final List<? extends DataType> list = (List<? extends DataType>) data; return createSequentialPartitions(list, numPartitions); } final int numData = CollectionUtil.size(data); final int numEach = numData / numPartitions; ArrayList<List<? extends DataType>> retval = new ArrayList<List<? extends DataType>>(numPartitions); int index = 0; Iterator<? extends DataType> iterator = data.iterator(); for (int n = 0; n < numPartitions; n++) { // The remainder goes into the final partition int numThis = (n < (numPartitions - 1)) ? numEach : (numData - index); ArrayList<DataType> partition = new ArrayList<DataType>(numThis); for (int i = 0; i < numThis; i++) { partition.add(iterator.next()); index++; } retval.add(partition); } return retval; } /** * Creates a partition of the given data into "numPartition" roughly equal * sets, preserving their pre-existing sequential ordering, with the nonzero * remainder elements going into the final partition. * * @param <DataType> Type of data to partition. * @param data Collection of data to partition * @param numPartitions Number of partitions to create. * @return List of Lists of size data.size()/numPartitions, with the * remainder of data elements going into the final partition. */ public static <DataType> ArrayList<List<? extends DataType>> createSequentialPartitions( List<? extends DataType> data, int numPartitions) { final int numData = CollectionUtil.size(data); final int numEach = numData / numPartitions; ArrayList<List<? extends DataType>> result = new ArrayList<List<? extends DataType>>(numPartitions); int beginIndex = 0; int endIndex = beginIndex + numEach; for (int i = 0; i < numPartitions; i++) { if (i == numPartitions - 1) { endIndex = numData; } result.add(data.subList(beginIndex, endIndex)); beginIndex = endIndex; endIndex += numEach; } return result; } /** * Returns the indexed value into the {@code Iterable}. It first checks to * see if the {@code Iterable} is a {@code List}, and if so calls the get * method. Otherwise, it walks the {@code Iterable} to get to the element. * * @param <DataType> The type of data. * @param iterable The iterable to pull the value from. * @param index The 0-based index to pull from the iterable. * @return The value at the given spot in the iterable. * @throws IndexOutOfBoundsException If the index is less than zero or * greater than or equal to the number of elements in the iterable. */ public static <DataType> DataType getElement( final Iterable<DataType> iterable, int index) { if (iterable instanceof List<?>) { return ((List<DataType>) iterable).get(index); } else { if (index < 0) { // Bad index. throw new IndexOutOfBoundsException("index must be >= 0"); } for (DataType v : iterable) { if (index == 0) { return v; } index--; } // Bad index. throw new IndexOutOfBoundsException("index >= iterable size"); } } /** * Removes and returns the indexed value into the {@code Iterable}. It first * checks to see if the {@code Iterable} is a {@code List}, and if so calls * the remove method. Otherwise, it walks the {@code Iterable} to get to the * element and remove it. This only works on {@code Iterable}s that are * {@code List}s or whose {@code Iterator} implements the optional * {@code remove} method. * * @param <DataType> The type of data. * @param iterable The iterable to remove the value from. * @param index The 0-based index to remove from the iterable. * @return The value removed from the given index in the iterable. * @throws IndexOutOfBoundsException If the index is less than zero or * greater than or equal to the number of elements in the iterable. * @throws UnsupportedOperationException If the iterable does not support * remove. */ public static <DataType> DataType removeElement( final Iterable<DataType> iterable, int index) { if (iterable instanceof List<?>) { return ((List<DataType>) iterable).remove(index); } else { if (index < 0) { // Bad index. throw new IndexOutOfBoundsException("index must be >= 0"); } Iterator<DataType> iterator = iterable.iterator(); while (iterator.hasNext()) { DataType value = iterator.next(); if (index == 0) { iterator.remove(); return value; } index--; } // Bad index. throw new IndexOutOfBoundsException("index >= iterable size"); } } /** * Performs a toString on each element given iterable with a given delimiter * between elements. * * @param list The list to call toString on each element for. * @param delimiter The delimiter. * @return A string with the toString on each element in the list called * with a given delimiter between elements. If null is given, then "null" is * returned. If an empty list is given, "" is returned. */ public static String toStringDelimited( final Iterable<?> list, final String delimiter) { if (list == null) { return "null"; } final StringBuffer result = new StringBuffer(); final Iterator<?> iterator = list.iterator(); if (iterator.hasNext()) { result.append(iterator.next()); } while (iterator.hasNext()) { result.append(delimiter); result.append(iterator.next()); } return result.toString(); } /** * Creates a new ArrayList from the given pair of values. * * @param <DataType> The data type. * @param first The first value. * @param second The second value. * @return A new array list with the two elements in it. */ public static <DataType> ArrayList<DataType> createArrayList( final DataType first, final DataType second) { final ArrayList<DataType> result = new ArrayList<DataType>(2); result.add(first); result.add(second); return result; } /** * Creates a new {@link HashMap} with the given expected size. It uses the * default load factor (0.75) to estimate the proper number of elements for * the data structure to avoid a rehash or resize when the given number of * elements are added. * * @param <KeyType> The type for the key of the map. * @param <ValueType> The type of the value in the map. * @param size The size. Must be positive. * @return A new hash map with the given expected size. */ public static <KeyType, ValueType> HashMap<KeyType, ValueType> createHashMapWithSize( final int size) { final int initialCapacity = (int) Math.ceil(size / DEFAULT_LOAD_FACTOR); return new HashMap<KeyType, ValueType>(initialCapacity, DEFAULT_LOAD_FACTOR); } /** * Creates a new {@link LinkedHashMap} with the given expected size. It uses * the default load factor (0.75) to estimate the proper number of elements * for the data structure to avoid a rehash or resize when the given number * of elements are added. * * @param <KeyType> The type for the key of the map. * @param <ValueType> The type of the value in the map. * @param size The size. Must be positive. * @return A new hash map with the given expected size. */ public static <KeyType, ValueType> LinkedHashMap<KeyType, ValueType> createLinkedHashMapWithSize( final int size) { final int initialCapacity = (int) Math.ceil(size / DEFAULT_LOAD_FACTOR); return new LinkedHashMap<KeyType, ValueType>( initialCapacity, DEFAULT_LOAD_FACTOR); } /** * Creates a new {@link HashSet} with the given expected size. It uses the * default load factor (0.75) to estimate the proper number of elements for * the data structure to avoid a rehash or resize when the given number of * elements are added. * * @param <ValueType> The type of the value in the set. * @param size The size. Must be positive. * @return A new hash map with the given expected size. */ public static <ValueType> HashSet<ValueType> createHashSetWithSize( final int size) { final int initialCapacity = (int) Math.ceil(size / DEFAULT_LOAD_FACTOR); return new HashSet<ValueType>(initialCapacity, DEFAULT_LOAD_FACTOR); } /** * Creates a new {@link LinkedHashSet} with the given expected size. It uses * the default load factor (0.75) to estimate the proper number of elements * for the data structure to avoid a rehash or resize when the given number * of elements are added. * * @param <ValueType> The type of the value in the set. * @param size The size. Must be positive. * @return A new hash map with the given expected size. */ public static <ValueType> LinkedHashSet<ValueType> createLinkedHashSetWithSize( final int size) { final int initialCapacity = (int) Math.ceil(size / DEFAULT_LOAD_FACTOR); return new LinkedHashSet<ValueType>(initialCapacity, DEFAULT_LOAD_FACTOR); } }