/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
*/
package edu.nd.nina.types;
import gnu.trove.map.hash.TObjectIntHashMap;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.rmi.dgc.VMID;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* A mapping between integers and objects where the mapping in each direction is
* efficient. Integers are assigned consecutively, starting at zero, as objects
* are added to the Alphabet. Objects can not be deleted from the Alphabet and
* thus the integers are never reused.
* <p>
* The most common use of an alphabet is as a dictionary of feature names
* associated with a {@link cc.mallet.types.FeatureVector} in an
* {@link cc.mallet.types.Instance}. In a simple document classification usage,
* each unique word in a document would be a unique entry in the Alphabet with a
* unique integer associated with it. FeatureVectors rely on the integer part of
* the mapping to efficiently represent the subset of the Alphabet present in
* the FeatureVector.
*
* @see FeatureVector
* @see Instance
*/
public class Alphabet {
TObjectIntHashMap<Object> map;
ArrayList<Object> entries;
boolean growthStopped = false;
Class<Object> entryClass = null;
VMID instanceId = new VMID(); // used in readResolve to identify persitent
// instances
public Alphabet(int capacity, Class<Object> entryClass) {
this.map = new TObjectIntHashMap<Object>(capacity);
this.entries = new ArrayList<Object>(capacity);
this.entryClass = entryClass;
}
public Alphabet(Class<Object> entryClass) {
this(8, entryClass);
}
public Alphabet(int capacity) {
this(capacity, null);
}
public Alphabet() {
this(8, null);
}
public Alphabet(Object[] entries) {
this(entries.length);
for (Object entry : entries)
this.lookupIndex(entry);
}
/** Return -1 if entry isn't present. */
@SuppressWarnings("unchecked")
public int lookupIndex(Object entry, boolean addIfNotPresent) {
if (entry == null)
throw new IllegalArgumentException(
"Can't lookup \"null\" in an Alphabet.");
if (entryClass == null)
entryClass = (Class<Object>) entry.getClass();
else
// Insist that all entries in the Alphabet are of the same
// class. This may not be strictly necessary, but will catch a
// bunch of easily-made errors.
if (entry.getClass() != entryClass)
throw new IllegalArgumentException("Non-matching entry class, "
+ entry.getClass() + ", was " + entryClass);
int retIndex = -1;
if (map.containsKey(entry)) {
retIndex = map.get(entry);
} else if (!growthStopped && addIfNotPresent) {
retIndex = entries.size();
map.put(entry, retIndex);
entries.add(entry);
}
return retIndex;
}
public int lookupIndex(Object entry) {
return lookupIndex(entry, true);
}
public Object lookupObject(int index) {
return entries.get(index);
}
public Object[] toArray() {
return entries.toArray();
}
/**
* Returns an array containing all the entries in the Alphabet. The runtime
* type of the returned array is the runtime type of in. If in is large
* enough to hold everything in the alphabet, then it it used. The returned
* array is such that for all entries <tt>obj</tt>,
* <tt>ret[lookupIndex(obj)] = obj</tt> .
*/
public Object[] toArray(Object[] in) {
return entries.toArray(in);
}
// xxx This should disable the iterator's remove method...
public Iterator<Object> iterator() {
return entries.iterator();
}
public List<Object> lookupObjects(int[] indices) {
List<Object> ret = new ArrayList<Object>(indices.length);
for (int i = 0; i < indices.length; i++)
ret.add(entries.get(indices[i]));
return ret;
}
/**
* Returns an array of the objects corresponding to
*
* @param indices
* An array of indices to look up
* @param buf
* An array to store the returned objects in.
* @return An array of values from this Alphabet. The runtime type of the
* array is the same as buf
*/
public Object[] lookupObjects(int[] indices, Object[] buf) {
for (int i = 0; i < indices.length; i++)
buf[i] = entries.get(indices[i]);
return buf;
}
public int[] lookupIndices(Object[] objects, boolean addIfNotPresent) {
int[] ret = new int[objects.length];
for (int i = 0; i < objects.length; i++)
ret[i] = lookupIndex(objects[i], addIfNotPresent);
return ret;
}
public boolean contains(Object entry) {
return map.contains(entry);
}
public int size() {
return entries.size();
}
public void stopGrowth() {
growthStopped = true;
}
public void startGrowth() {
growthStopped = false;
}
public boolean growthStopped() {
return growthStopped;
}
public Class<Object> entryClass() {
return entryClass;
}
/**
* Return String representation of all Alphabet entries, each separated by a
* newline.
*/
public String toString() {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < entries.size(); i++) {
sb.append(entries.get(i).toString());
sb.append('\n');
}
return sb.toString();
}
public void dump() {
dump(System.out);
}
public void dump(PrintStream out) {
dump(new PrintWriter(new OutputStreamWriter(out), true));
}
public void dump(PrintWriter out) {
for (int i = 0; i < entries.size(); i++) {
out.println(i + " => " + entries.get(i));
}
}
/**
* Convenience method that can often implement alphabetsMatch in classes
* that implement the AlphabetsCarrying interface.
*/
public static boolean alphabetsMatch(AlphabetCarrying object1,
AlphabetCarrying object2) {
List<Alphabet> a1 = object1.getAlphabets();
List<Alphabet> a2 = object2.getAlphabets();
if (a1.size() != a2.size())
return false;
for (int i = 0; i < a1.size(); i++) {
if (a1.get(i) == a2.get(i))
continue;
if (a1.get(i) == null || a2.get(i) == null)
return false; // One is null, but the other isn't
if (!a1.get(i).equals(a2.get(i)))
return false;
}
return true;
}
public VMID getInstanceId() {
return instanceId;
} // for debugging
public void setInstanceId(VMID id) {
this.instanceId = id;
}
}