/*
VectorUtils.java
Convenience methods for working with Lists.. provides efficient
Union, Intersection, and Difference methods.
Created: 21 July 1998
Module By: Jonathan Abbey, jonabbey@arlut.utexas.edu
-----------------------------------------------------------------------
Directory Directory Management System
Copyright (C) 1996-2012
The University of Texas at Austin
Ganymede is a registered trademark of The University of Texas at Austin
Contact information
Web site: http://www.arlut.utexas.edu/gash2
Author Email: ganymede_author@arlut.utexas.edu
Email mailing list: ganymede@arlut.utexas.edu
US Mail:
Computer Science Division
Applied Research Laboratories
The University of Texas at Austin
PO Box 8029, Austin TX 78713-8029
Telephone: (512) 835-3200
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package arlut.csd.Util;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.Vector;
/*------------------------------------------------------------------------------
class
VectorUtils
------------------------------------------------------------------------------*/
/**
* Convenience methods for working with Vectors.. provides efficient Union,
* Intersection, and Difference methods.
*/
public class VectorUtils {
/**
* <p>This method returns a Vector containing the union of the objects
* contained in vectA and vectB. The resulting Vector will not
* contain any duplicates, even if vectA or vectB themselves contain
* repeated items.</p>
*
* <p>This method will always return a new, non-null Vector, even if
* vectA and/or vectB are null.</p>
*/
public static <E> Vector<E> union(List<E> vectA, List<E> vectB)
{
int threshold = vectSize(vectA) + vectSize(vectB);
if (threshold < 10) // I pulled 10 out of my ass
{
Vector<E> result = new Vector<E>(threshold);
if (vectA != null)
{
for (E obj: vectA)
{
result.add(obj);
}
}
if (vectB != null)
{
for (E obj: vectB)
{
if (!result.contains(obj))
{
result.add(obj);
}
}
}
result.trimToSize();
return result;
}
else
{
// If we have a big enough set of elements to union, use a
// temporary hashtable so that we have better scalability for
// item lookup.
Set<E> workSet = new HashSet<E>(vectSize(vectA) + vectSize(vectB));
/* -- */
if (vectA != null)
{
workSet.addAll(vectA);
}
if (vectB != null)
{
workSet.addAll(vectB);
}
return new Vector<E>(workSet);
}
}
/**
* <p>This method adds obj to vect if and only if vect does not
* already contain obj.</p>
*/
public static <E> void unionAdd(List<E> vect, E obj)
{
if (obj == null)
{
return;
}
if (vect.contains(obj))
{
return;
}
vect.add(obj);
}
/**
* <p>Returns true if vectA and vectB have any elements in
* common.</p>
*/
public static <E> boolean overlaps(List<E> vectA, List<E> vectB)
{
if (vectA == null || vectB == null || vectA.size() == 0 || vectB.size() == 0)
{
return false;
}
if ((vectA.size() + vectB.size()) > 20) // ass, again
{
Set<E> workSet = new HashSet<E>(vectA.size());
workSet.addAll(vectA);
for (int i = 0; i < vectB.size(); i++)
{
if (workSet.contains(vectB.get(i)))
{
return true;
}
}
}
else
{
if (vectA.size() > vectB.size())
{
for (int i = 0; i < vectA.size(); i++)
{
if (vectB.contains(vectA.get(i)))
{
return true;
}
}
}
else
{
for (int i = 0; i < vectB.size(); i++)
{
if (vectA.contains(vectB.get(i)))
{
return true;
}
}
}
}
return false;
}
/**
* <p>This method returns a Vector containing the intersection of the
* objects contained in vectA and vectB.</p>
*
* <p>This method will always return a new, non-null Vector, even if
* vectA and/or vectB are null.</p>
*/
public static <E> Vector<E> intersection(List<E> vectA, List<E> vectB)
{
Set<E>
workSetA = new HashSet<E>(),
workSetB = new HashSet<E>(),
resultSet = new HashSet<E>();
/* -- */
if (vectA != null)
{
workSetA.addAll(vectA);
}
if (vectB != null)
{
workSetB.addAll(vectB);
}
for (E item: workSetA)
{
if (workSetB.contains(item))
{
resultSet.add(item);
}
}
for (E item: workSetB)
{
if (workSetA.contains(item))
{
resultSet.add(item);
}
}
return new Vector<E>(resultSet);
}
/**
* <p>This method returns a Vector containing the set of objects
* contained in vectA that are not contained in vectB.</p>
*
* <p>NB: This method differs from minus(), which explicitly handles
* lists with duplicate members and returns an algebraic count of
* the non-duplicates. By contrast, difference() returns a Vector
* with no duplicates.</p>
*
* <p>This method will always return a new, non-null Vector, even if
* vectA and/or vectB are null.</p>
*/
public static <E> Vector<E> difference(List<E> vectA, List<E> vectB)
{
Vector<E> result = new Vector<E>();
/* -- */
if (vectA == null || vectA.size() == 0)
{
return result;
}
if (vectB == null || vectB.size() == 0)
{
return new Vector<E>(new HashSet<E>(vectA)); // remove duplicates
}
if (vectA.size() + vectB.size() < 5) // ass, from my
{
HashSet<E> workSetA = new HashSet<E>(vectA);
for (E item: workSetA)
{
if (!vectB.contains(item))
{
result.add(item);
}
}
}
else
{
HashSet<E> workSetA = new HashSet<E>(vectA);
HashSet<E> workSetB = new HashSet<E>(vectB);
for (E item: workSetA)
{
if (!workSetB.contains(item))
{
result.add(item);
}
}
}
return result;
}
/**
* <p>This method returns true if vectA and vectB contain the same
* elements, in whatever order.</p>
*/
public static <E> boolean equalMembers(List<E> vectA, List<E> vectB)
{
if (vectSize(vectA) != vectSize(vectB))
{
return false;
}
return intersection(vectA, vectB).size() == vectSize(vectA);
}
/**
* <p>This method returns a Vector of items that appeared in the
* vector parameter more than once.</p>
*
* <p>If no duplicates are found or if vector is null, this method
* returns null.</p>
*/
public static <E> Vector<E> duplicates(List<E> vector)
{
if (vector == null)
{
return null;
}
Vector<E> result = null;
Set<E> found = new HashSet<E>();
for (E item: vector)
{
if (found.contains(item))
{
if (result == null)
{
result = new Vector<E>();
}
unionAdd(result, item);
}
found.add(item);
}
return result;
}
/**
* <p>This method returns a Vector containing the elements of vectA minus
* the elements of vectB.</p>
*
* <p>NB: This method is not a synonym for difference(). With
* minus(), if vectA has an element in the Vector 5 times and vectB
* has it 3 times, the result will have it two times. difference()
* always returns a Vector without duplicates</p>
*
* <p>This method will always return a new, non-null Vector, even if
* vectA and/or vectB are null.</p>
*/
public static <E> Vector<E> minus(List<E> vectA, List<E> vectB)
{
if (vectA == null)
{
return new Vector<E>(); // empty
}
Vector<E> result = new Vector<E>(vectA);
if (vectB != null)
{
for (E item: vectB)
{
result.remove(item);
}
}
return result;
}
/**
* <p>This method returns a string containing all the elements in vec
* concatenated together, comma separated.</p>
*/
public static String vectorString(Collection vec)
{
return VectorUtils.vectorString(vec, ",");
}
/**
* <p>This method returns a string containing all the elements in vec
* concatenated together, comma separated.</p>
*/
public static String vectorString(Collection vec, String separator)
{
if (vec == null || vec.size() == 0)
{
return "";
}
StringBuilder temp = new StringBuilder();
Iterator x = vec.iterator();
temp.append(x.next());
while (x.hasNext())
{
temp.append(separator);
temp.append(x.next());
}
return temp.toString();
}
/**
* <p>This method takes a sepChars-separated string and converts it to
* a vector of fields. i.e., "gomod,jonabbey" -> a vector whose
* elements are "gomod" and "jonabbey".</p>
*
* <p>NOTE: this method will omit 'degenerate' fields from the output
* vector. That is, if input is "gomod,,, jonabbey" and sepChars
* is ", ", then the result vector will still only have "gomod"
* and "jonabbey" as elements, even though one might wish to
* explicitly know about the blanks between commas. This method
* is intended mostly for creating email list vectors, rather than
* general file-parsing vectors.</p>
*
* @param input the sepChars-separated string to test.
*
* @param sepChars a string containing a list of characters which
* may occur as field separators. Any two fields in the input may
* be separated by one or many of the characters present in sepChars.
*/
public static Vector stringVector(String input, String sepChars)
{
Vector results = new Vector();
int index = 0;
int oldindex = 0;
String temp;
char inputAry[] = input.toCharArray();
/* -- */
while (index != -1)
{
// skip any leading field-separator chars
for (; oldindex < input.length(); oldindex++)
{
if (sepChars.indexOf(inputAry[oldindex]) == -1)
{
break;
}
}
if (oldindex == input.length())
{
break;
}
index = findNextSep(input, oldindex, sepChars);
if (index == -1)
{
temp = input.substring(oldindex);
// System.err.println("+ " + temp + " +");
results.add(temp);
}
else
{
temp = input.substring(oldindex, index);
// System.err.println("* " + temp + " *");
results.add(temp);
oldindex = index + 1;
}
}
return results;
}
/**
* <p>findNextSep() takes a string, a starting position, and a string of
* characters to be considered field separators, and returns the
* first index after startDex whose char is in sepChars.</p>
*
* <p>If there are no chars in sepChars past startdex in input, findNextSep()
* returns -1.</p>
*/
private static int findNextSep(String input, int startDex, String sepChars)
{
int currentIndex = input.length();
char sepAry[] = sepChars.toCharArray();
boolean foundSep = false;
/* -- */
// find the next separator
for (int i = 0; i < sepAry.length; i++)
{
int tempdex = input.indexOf(sepAry[i], startDex);
if (tempdex > -1 && tempdex <= currentIndex)
{
currentIndex = tempdex;
foundSep = true;
}
}
if (foundSep)
{
return currentIndex;
}
else
{
return -1;
}
}
private static int vectSize(List x)
{
if (x == null)
{
return 0;
}
else
{
return x.size();
}
}
// debug rig.
public static void main(String[] args)
{
// String testString = "jon, beth ross,,,darren,anna";
String testString = "jon, beth ross,,,darren,anna,,,,,";
Vector results = stringVector(testString, ", ");
for (int i = 0; i < results.size(); i++)
{
System.out.println(i + ": " + results.get(i));
}
}
}