package edu.cmu.minorthird.classify; import java.util.Iterator; /** * @author Edoardo Airoldi * Date: Nov 24, 2003 */ public class BasicFeatureIndex extends DatasetIndex implements FeatureIndex{ static final long serialVersionUID=20071015L; public BasicFeatureIndex(){ super(); } public BasicFeatureIndex(Dataset data){ this(); for(Iterator<Example> i=data.iterator();i.hasNext();){ addExample(i.next()); } } /** Number of examples with label l containing non-zero values for feature f. */ public int size(Feature f,String label){ int size=0; for(int j=0;j<size(f);j++){ if(label.equals(getExample(f,j).getLabel().bestClassName())){ size+=1; } } return size; } /** Get counts of feature f in i-th example containing feature f */ @Override public double getCounts(Feature f,int i){ return (featureIndex(f).get(i)).getWeight(f); } /** Get counts of feature f in examples with label l */ public double getCounts(Feature f,String label){ double total=0.0; for(int j=0;j<size(f);j++){ //System.out.println( getExample(f,j).getLabel().bestClassName() ); if(label.equals(getExample(f,j).getLabel().bestClassName())){ total+=(featureIndex(f).get(j)).getWeight(f); } } return total; } /** Get number of documents which contain feature f with label l */ public double getDocCounts(Feature f,String label){ double total=0.0; for(int j=0;j<size(f);j++){ //System.out.println( getExample(f,j).getLabel().bestClassName() ); if(label.equals(getExample(f,j).getLabel().bestClassName())){ total+=1.0; } } return total; } @Override public String toString(){ StringBuilder buf=new StringBuilder("[index"); for(Iterator<Feature> i=featureIterator();i.hasNext();){ Feature f=i.next(); buf.append("\n"+f+":"); for(int j=0;j<size(f);j++){ buf.append("\n\t"+getExample(f,j).toString()); //buf.append("\n\t"+"feature:"+f+" counts:"+getCounts(f,j)); //buf.append(" label:"+getExample(f,j).getLabel().bestClassName()); } buf.append("\n\t"+"feature:"+f+" posCounts:"+getCounts(f,"POS")+" negCouns:"+getCounts(f,"NEG")); } buf.append("\nindex]"); return buf.toString(); } public static void main(String[] args){ System.out.println(new BasicFeatureIndex(SampleDatasets.sampleData("bayes",false))); } }