package edu.stanford.nlp.util;
import java.io.*;
import java.util.*;
/**
* An Index is a collection that maps between an Object vocabulary and a
* contiguous non-negative integer index series beginning (inclusively) at 0.
* It supports constant-time lookup in
* both directions (via <code>get(int)</code> and <code>indexOf(E)</code>.
* The <code>indexOf(E)</code> method compares objects by
* <code>equals</code>, as other Collections.
* <p/>
* The typical usage would be:
* <p><code>Index index = new Index(collection);</code>
* <p> followed by
* <p><code>int i = index.indexOf(object);</code>
* <p> or
* <p><code>Object o = index.get(i);</code>
* <p>The source contains a concrete example of use as the main method.
* <p>An Index can be locked or unlocked: a locked index cannot have new
* items added to it.
*
* @author <a href="mailto:klein@cs.stanford.edu">Dan Klein</a>
* @version 1.0
* @see AbstractCollection
* @since 1.0
* @author <a href="mailto:yeh1@stanford.edu">Eric Yeh</a> (added write to/load from buffer)
*/
public class Index<E> extends AbstractCollection<E> implements Serializable, RandomAccess, IndexInterface<E> {
// these variables are also used in IntArrayIndex
ArrayList<E> objects = new ArrayList<E>();
HashMap<E,Integer> indexes = new HashMap<E,Integer>();
boolean locked; // = false;
/**
* Clears this Index.
*/
@Override
public void clear() {
objects.clear();
indexes.clear();
}
/**
* Returns the index of each elem in a List.
* @param elems The list of items
* @return An array of indices
*/
public int[] indices(Collection<E> elems) {
int[] indices = new int[elems.size()];
int i = 0;
for (E elem : elems) {
indices[i++] = indexOf(elem);
}
return indices;
}
/**
* Looks up the objects corresponding to an array of indices, and returns them in a {@link Collection}.
* This collection is not a copy, but accesses the data structures of the Index.
* @param indices An array of indices
* @return a {@link Collection} of the objects corresponding to the indices argument.
*/
public Collection<E> objects(final int[] indices) {
return new AbstractList<E>() {
@Override
public E get(int index) {
return objects.get(indices[index]);
}
@Override
public int size() {
return indices.length;
}
};
}
/**
* Returns the number of indexed objects.
* @return the number of indexed objects.
*/
@Override
public int size() {
return objects.size();
}
/**
* Gets the object whose index is the integer argument.
* @param i the integer index to be queried for the corresponding argument
* @return the object whose index is the integer argument.
*/
public E get(int i) {
return objects.get(i);
}
/**
* Returns a complete {@link List} of indexed objects, in the order of their indices. <b>DANGER!</b>
* The current implementation returns the actual index list, not a defensive copy. Messing with this List
* can seriously screw up the state of the Index. (perhaps this method needs to be eliminated? I don't think it's
* ever used in ways that we couldn't use the Index itself for directly. --Roger, 12/29/04)
* @return a complete {@link List} of indexed objects
*/
public List<E> objectsList() {
return objects;
}
/**
* Queries the Index for whether it's locked or not.
* @return whether or not the Index is locked
*/
public boolean isLocked() {
return locked;
}
/** Locks the Index. A locked index cannot have new elements added to it (calls to {@link #add} will
* leave the Index unchanged and return <code>false</code>).*/
public void lock() {
locked = true;
}
/** Unlocks the Index. A locked index cannot have new elements added to it (calls to {@link #add} will
* leave the Index unchanged and return <code>false</code>).*/
public void unlock() {
locked = false;
}
/**
* Returns the integer index of the Object in the Index or -1 if the Object is not already in the Index.
* @param o the Object whose index is desired.
* @return the index of the Object argument. Returns -1 if the object is not in the index.
*/
public int indexOf(E o) {
return indexOf(o, false);
}
/**
* Takes an Object and returns the integer index of the Object,
* perhaps adding it to the index first.
* Returns -1 if the Object is not in the Index.
* (Note: indexOf(x, true) is the direct replacement for the number(x)
* method in the old Numberer class.)
*
* @param o the Object whose index is desired.
* @param add Whether it is okay to add new items to the index
* @return the index of the Object argument. Returns -1 if the object is not in the index.
*/
public int indexOf(E o, boolean add) {
Integer index = indexes.get(o);
if (index == null) {
if (add) {
add(o);
index = indexes.get(o);
} else {
return -1;
}
}
return index;
}
// TODO: delete this because we can leach off of Abstract Collection
/**
* Adds every member of Collection to the Index. Does nothing for members already in the Index.
*
* @return true if some item was added to the index and false if no
* item was already in the index or if the index is locked
*/
@Override
public boolean addAll(Collection<? extends E> c) {
boolean changed = false;
for (E element: c) {
changed |= add(element);
//changed &= add(element);
}
return changed;
}
/**
* Adds an object to the Index. If it was already in the Index,
* then nothing is done. If it is not in the Index, then it is
* added iff the Index hasn't been locked.
*
* @return true if the item was added to the index and false if the
* item was already in the index or if the index is locked
*/
@Override
public boolean add(E o) {
Integer index = indexes.get(o);
if (index == null && ! locked) {
index = objects.size();
objects.add(o);
indexes.put(o, index);
return true;
}
return false;
}
/**
* Checks whether an Object already has an index in the Index
* @param o the object to be queried.
* @return true iff there is an index for the queried object.
*/
@Override
public boolean contains(Object o) {
return indexes.containsKey(o);
}
/**
* Creates a new Index.
*/
public Index() {
super();
}
/**
* Creates a new Index.
* @param capacity Initial capacity of Index.
*/
public Index(int capacity) {
super();
objects = new ArrayList<E>(capacity);
indexes = new HashMap<E,Integer>(capacity);
}
/**
* Creates a new Index and adds every member of c to it.
* @param c A collection of objects
*/
public Index(Collection<? extends E> c) {
this();
addAll(c);
}
public void saveToFilename(String file) {
BufferedWriter bw = null;
try {
bw = new BufferedWriter(new FileWriter(file));
for (int i = 0, sz = size(); i < sz; i++) {
bw.write(i + "=" + get(i) + "\n");
}
bw.close();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (bw != null) {
try {
bw.close();
} catch (IOException ioe) {
// give up
}
}
}
}
public static Index<String> loadFromFilename(String file) {
Index<String> index = new Index<String>();
BufferedReader br = null;
try {
br = new BufferedReader(new FileReader(file));
for (String line; (line = br.readLine()) != null; ) {
int start = line.indexOf('=');
if (start == -1 || start == line.length() - 1) {
continue;
}
index.add(line.substring(start + 1));
}
br.close();
} catch (Exception e) {
e.printStackTrace();
} finally {
if (br != null) {
try {
br.close();
} catch (IOException ioe) {
// forget it
}
}
}
return index;
}
/**
* This saves the contents of this index into string form, as part of a larger
* text-serialization. This is not intended to act as a standalone routine,
* instead being called from the text-serialization routine for a component
* that makes use of an Index, so everything can be stored in one file. This is
* similar to <code>saveToFileName</code>.
* NOTE: adds an extra newline at the end of the sequence.
* @param bw Writer to save to.
* @throws IOException Exception thrown if cannot save.
*/
public void saveToWriter(Writer bw) throws IOException {
for (int i = 0, sz = size(); i < sz; i++) {
bw.write(i + "=" + get(i) + "\n");
}
bw.write("\n");
}
/**
* This is the analogue of <code>loadFromFilename</code>, and is intended to be included in a routine
* that unpacks a text-serialized form of an object that incorporates an Index.
* NOTE: presumes that the next readLine() will read in the first line of the
* portion of the text file representing the saved Index. Currently reads until it
* encounters a blank line, consuming that line and returning the Index.
* TODO: figure out how best to terminate: currently a blank line is considered to be a terminator.
* @param br The Reader to read the index from
* @return An Index read from a file
*/
public static Index<String> loadFromReader(BufferedReader br) throws Exception {
Index<String> index = new Index<String>();
String line = br.readLine();
// terminate if EOF reached, or if a blank line is encountered.
while ((line != null) && (line.length() > 0)) {
int start = line.indexOf('=');
if (start == -1 || start == line.length() - 1) {
continue;
}
index.add(line.substring(start + 1));
line = br.readLine();
}
return index;
}
/** Returns a readable version of the Index contents
*
* @return A String showing the full index contents
*/
@Override
public String toString() {
return toString(Integer.MAX_VALUE);
}
/** Returns a readable version of at least part of the Index contents.
*
* @param n Show the first <i>n</i> items in the Index
* @return A String rshowing some of the index contents
*/
public String toString(int n) {
StringBuilder buff = new StringBuilder("[");
int sz = objects.size();
if (n > sz) {
n = sz;
}
int i;
for (i = 0; i < n; i++) {
E e = objects.get(i);
buff.append(i).append("=").append(e);
if (i < (sz-1)) buff.append(",");
}
if (i < sz) buff.append("...");
buff.append("]");
return buff.toString();
}
public static void main(String[] args) {
List<String> list = new ArrayList<String>();
list.add("A");
list.add("B");
list.add("A");
list.add("C");
Index<String> index = new Index<String>(list);
System.out.println("Index size: " + index.size());
System.out.println("Index has A? : " + index.contains("A"));
System.out.println("Index of A: " + index.indexOf("A"));
System.out.println("Index of B: " + index.indexOf("B"));
System.out.println("Index of C: " + index.indexOf("C"));
System.out.println("Object 0: " + index.get(0));
index = index.unmodifiableView();
System.out.println("Index size: " + index.size());
System.out.println("Index has A? : " + index.contains("A"));
System.out.println("Index of A: " + index.indexOf("A"));
System.out.println("Index of B: " + index.indexOf("B"));
System.out.println("Index of C: " + index.indexOf("C"));
System.out.println("Object 0: " + index.get(0));
}
private static final long serialVersionUID = 5398562825928375260L;
/**
* Returns an iterator over the elements of the collection.
* @return An iterator over the objects indexed
*/
@Override
public Iterator<E> iterator() {
return objects.iterator();
}
/**
* Removes an object from the index, if it exists (otherwise nothing
* happens). Note, the indices of other
* elements will not be changed, so indices will no longer necessarily
* be contiguous
* @param o the object to remove
* @return whether anything was removed
*/
@Override
public boolean remove(Object o) {
Integer oldIndex = indexes.remove(o);
if (oldIndex == null) {
return false;
}
objects.set(oldIndex, null);
return true;
}
/**
* Returns an unmodifiable view of the Index. It is just
* a locked index that cannot be unlocked, so if you
* try to add something, nothing will happen (it won't throw
* an exception). Trying to unlock it will throw an
* UnsupportedOperationException. If the
* underlying Index is modified, the change will
* "write-through" to the view.
*
* @return An unmodifiable view of the Index
*/
public Index<E> unmodifiableView() {
Index<E> newIndex = new Index<E>() {
@Override
public void unlock() { throw new UnsupportedOperationException("This is an unmodifiable view!"); }
private static final long serialVersionUID = 3415903369787491736L;
};
newIndex.objects = objects;
newIndex.indexes = indexes;
newIndex.lock();
return newIndex;
}
}