/* Copyright 2003-2004, Carnegie Mellon, All Rights Reserved */ package edu.cmu.minorthird.classify; import it.unimi.dsi.fastutil.objects.Object2ObjectArrayMap; import it.unimi.dsi.fastutil.objects.ObjectArrayList; import java.io.Serializable; import java.util.Arrays; import java.util.Iterator; import java.util.SortedSet; import java.util.TreeSet; import edu.cmu.minorthird.classify.multi.MultiExample; /** * Creates Features, and maintains a mapping between Features and * numeric ids. Also ensures that only a single feature instance * exists with a particular name. */ public class FeatureFactory implements Serializable{ static private final long serialVersionUID=20080115L; // maps features to canonical features private Object2ObjectArrayMap<Feature, Feature> canonicalMap = new Object2ObjectArrayMap<>(); // maps feature ID's to canonical features private ObjectArrayList<Feature> idFeatureMap = new ObjectArrayList<>(); public int getMaxFeatureIndex(){ return idFeatureMap.size()-1; } public boolean contains(Feature f){ return canonicalMap.containsKey(f); } // get the canonical version of the feature public Feature getFeature(Feature f){ Feature canonical=canonicalMap.get(f); if(canonical==null){ // not yet stored, so create a canonical version canonical=new Feature(f.getName(),idFeatureMap.size()); canonicalMap.put(f,canonical); idFeatureMap.add(canonical); } return canonical; } public Feature getFeature(String fullName){ Feature f=new Feature(fullName); return getFeature(f); } public Feature getFeature(String[] name){ Feature f=new Feature(name); return getFeature(f); } public Feature getFeature(int id){ if(id<0||id>=idFeatureMap.size()){ return null; } else{ return idFeatureMap.get(id); } } public int getID(Feature feature){ return idFeatureMap.indexOf(feature); } /** * Return a version of the instance in which all features have been * translated to canonical versions from the feature factory */ public CompactInstance compress(Instance instance){ if(instance instanceof CompactInstance&&((CompactInstance)instance).getFactory()==this){ return (CompactInstance)instance; } else{ return new CompactInstance(instance); } } /** * Return a version of the Example in which all features have been * translated to canonical versions from the feature factory. */ public Example compress(Example example){ return new Example(compress(example.asInstance()),example.getLabel(),example.getWeight()); } /** * Return a version of the MultiExample in which all features have been * translated to canonical versions from the feature factory. */ public MultiExample compress(MultiExample example){ return new MultiExample(compress(example.asInstance()),example.getMultiLabel(),example.getWeight()); } @Override public String toString(){ StringBuilder b=new StringBuilder(); b.append(super.toString()).append(" : ["); for(int i=0;i<idFeatureMap.size();i++){ b.append(i).append("=").append(idFeatureMap.get(i)); if(i<idFeatureMap.size()-1){ b.append(" "); } } b.append("]"); return b.toString(); } /** * A compact but immutable implementation of an instance. * @author wcohen, ksteppe */ protected class CompactInstance extends AbstractInstance implements Serializable{ static final long serialVersionUID=20071015L; private Feature[] binaryFeatures; private Feature[] numericFeatures; private double[] weights; /** * Create a compact instance from some other instance * @param instance Instance object to generate from */ public CompactInstance(Instance instance){ // copy over the source and subpopulation id this.source=instance.getSource(); this.subpopulationId=instance.getSubpopulationId(); // create a sorted set for holding and sorting the features SortedSet<Feature> set=new TreeSet<Feature>(); // iterate over binary features and store in array for(Iterator<Feature> it=instance.binaryFeatureIterator();it.hasNext();){ set.add(getFeature(it.next())); } binaryFeatures=set.toArray(new Feature[set.size()]); set.clear(); // iterate over numeric features and store in array for(Iterator<Feature> it=instance.numericFeatureIterator();it.hasNext();){ set.add(getFeature(it.next())); } numericFeatures=set.toArray(new Feature[set.size()]); // store numeric feature weights weights=new double[numericFeatures.length]; for(int i=0;i<numericFeatures.length;i++){ weights[i]=instance.getWeight(numericFeatures[i]); } } // returns the factory that compressed this instance public FeatureFactory getFactory(){return FeatureFactory.this;} // using binary search to find feature weight; should it be more efficient? @Override public double getWeight(Feature f){ // search through binary features first if(Arrays.binarySearch(binaryFeatures,f)>-1){ return 1; } // then search through numeric features int index=Arrays.binarySearch(numericFeatures,f); if(index>-1){ return weights[index]; } else{ return 0; } } @Override public Iterator<Feature> binaryFeatureIterator(){ return new FeatureArrayIterator(binaryFeatures); } @Override public Iterator<Feature> numericFeatureIterator(){ return new FeatureArrayIterator(numericFeatures); } @Override public Iterator<Feature> featureIterator(){ return new UnionFeatureArrayIterator(binaryFeatures,numericFeatures); } @Override public int numFeatures(){ return binaryFeatures.length+numericFeatures.length; } @Override public String toString(){ StringBuilder b=new StringBuilder(); b.append("[compact instance/").append(subpopulationId).append(":"); for(int i=0;i<binaryFeatures.length;i++){ b.append(" ").append(binaryFeatures[i]); } for(int i=0;i<numericFeatures.length;i++){ b.append(" ").append(numericFeatures[i]).append(":").append(getWeight(numericFeatures[i])); } b.append("]"); return b.toString(); } /** a looper over a feature array with Feature.Looper type */ public class FeatureArrayIterator implements Iterator<Feature>{ private int current; private Feature[] features; public FeatureArrayIterator(Feature[] features){ current=0; this.features=features; } @Override public boolean hasNext(){ return current<features.length; } @Override public Feature next(){ return features[current++]; } @Override public void remove(){ throw new Error("method CompactInstance.FeatureArrayLooper: remove not implemented."); } } /** sequential composite of n FeatureArrayLoopers (constructor for 2) */ public class UnionFeatureArrayIterator extends FeatureArrayIterator{ public UnionFeatureArrayIterator(Feature[] features,Feature[] moreFeatures){ super(combine(features,moreFeatures)); } } } private static Feature[] combine(Feature[] a1,Feature[] a2){ Feature[] combined=new Feature[a1.length+a2.length]; for(int i=0;i<a1.length;i++){ combined[i]=a1[i]; } for(int i=0;i<a2.length;i++){ combined[a1.length+i]=a2[i]; } return combined; } }