/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
Simple, in-memory inverted index that stores a list of instances having each feature, but not
a value associated with each. Currently only works with FeatureVectors.
@author Andrew McCallum <a href="mailto:mccallum@cs.umass.edu">mccallum@cs.umass.edu</a>
*/
package cc.mallet.types;
import java.util.*;
public class InvertedIndex
{
InstanceList ilist;
ArrayList[] ii;
public InvertedIndex (InstanceList ilist)
{
// bug fix from Georgios Paltoglou (G.Paltoglou@wlv.ac.uk)
this.ilist = ilist;
int numFeatures = ilist.getDataAlphabet().size();
ii = new ArrayList[numFeatures];
for (int i = 0; i < ilist.size(); i++) {
Instance inst = ilist.get(i);
if (!(inst.getData() instanceof FeatureVector))
throw new IllegalArgumentException (this.getClass().getName() +
" currently only handles FeatureVector data");
FeatureVector fv = (FeatureVector) inst.getData ();
for (int fl = 0; fl < fv.numLocations(); fl++) {
if (fv.valueAtLocation(fl) != 0)
addEntry (fv.indexAtLocation(fl), inst);
}
}
}
private void addEntry (int featureIndex, Instance instance)
{
if (ii[featureIndex] == null)
ii[featureIndex] = new ArrayList(2);
ii[featureIndex].add (instance);
}
public InstanceList getInstanceList () { return ilist; }
public ArrayList getInstancesWithFeature (int featureIndex)
{
return ii[featureIndex];
}
public ArrayList getInstancesWithFeature (Object feature)
{
int index = ilist.getDataAlphabet().lookupIndex (feature, false);
if (index == -1)
throw new IllegalArgumentException ("Feature "+feature+" not contained in InvertedIndex");
return getInstancesWithFeature (index);
}
public int getCountWithFeature (int featureIndex)
{
ArrayList a = ii[featureIndex];
return a == null ? 0 : a.size();
}
public int getCountWithFeature (Object feature)
{
int index = ilist.getDataAlphabet().lookupIndex (feature, false);
if (index == -1)
throw new IllegalArgumentException ("Feature "+feature+" not contained in InvertedIndex");
ArrayList a = ii[index];
return a == null ? 0 : a.size();
}
}