SpanFE.java example

Explorer
MinorThird-master
package edu.cmu.minorthird.text.learn;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Serializable;
import java.util.Iterator;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import edu.cmu.minorthird.classify.Feature;
import edu.cmu.minorthird.classify.Instance;
import edu.cmu.minorthird.classify.MutableInstance;
import edu.cmu.minorthird.text.AnnotatorLoader;
import edu.cmu.minorthird.text.EmptyLabels;
import edu.cmu.minorthird.text.Span;
import edu.cmu.minorthird.text.StopWords;
import edu.cmu.minorthird.text.TextLabels;
import edu.cmu.minorthird.text.Token;

/**
 * A Feature Extractor which converts a Span to an Instance.
 * 
 * <p>
 * Typical use of this would be something like the following: <code><pre>
 * SpanFE fe=new SpanFE(labels){
 * 
 * 	public void extractFeatures(Span span){
 * 		from(span).tokens().emit();
 * 		from(span).left().subSpan(-2,2).emit();
 * 		from(span).right().subSpan(0,2).emit();
 * 		from(span).right().contains("obj").emit();
 * 	}
 * };
 * 
 * Instance inst=fe.extractInstance(span);
 * </pre></code> Generally, to use this class, one subclasses it and implements the
 * extractFeatures method, using a chain of feature-extracting actions which
 * starts with 'from' and ends with 'emit'.
 * <p>
 * The methods tokens(), subSpan(), and so on are defined in subclasses of
 * SpanFE.Result, and are summarized here.
 * <ul>
 * <li> result.trace() - prints some stuff to stdout by called
 * SpanFE.trace(result). SpanFE.trace can be overloaded for different behavior.
 * <li> result.emit() - ends a feature extraction pipeline by calling
 * SpanFE(result), which can be overloaded.
 * <li> result.left() - if result contains a single span, find the left context
 * of that span (a span containing all tokens before it).
 * <li> result.right() - if result contains a single span, find the right
 * context of that span (all tokens after it).
 * <li> result.contains(String type) - if result contains a single span, find
 * the set of all spans of given type contained by that span.
 * <li> result.subSpan(int lo,int len) - if result contains a single span, find
 * the appropriate subspan of that span.
 * <li> result.tokens() - if result contains a single span, find the set of all
 * tokens contained in that span (a 'bag of words'. Extends to a set of spans as
 * well.
 * <li> result.token(int i) - if result contains a single span, construct the
 * set containing the i-th token only.
 * <li> result.first(), result.last() - return the first/last element of a set
 * of Spans.
 * <li> result.eq() - for a set of tokens, construct a set of features of the
 * form 'x y z eq v' where v is the value of the token and 'x y z' is the path
 * of feature extraction steps needed to get to set of tokens.
 * </ul>
 * 
 * 
 * @author William Cohen
 */

abstract public class SpanFE implements SpanFeatureExtractor,MixupCompatible,
		Serializable{

	// for serialization
	static private final long serialVersionUID=20080306L;

	/**
	 * Store features as binary, whenever possible, even if occurence counts are
	 * ignored.
	 */
	static public final int STORE_AS_BINARY=1;

	/** Store features as numeric counts, whenever possible */
	static public final int STORE_AS_COUNTS=2;

	/**
	 * Store features as binary or counts, trying to reduce storage while
	 * maintaining information.
	 */
	static public final int STORE_COMPACTLY=3;

	private int featureStoragePolicy=STORE_AS_COUNTS;

	// buffers for intermediate results & inputs in feature extraction
	transient protected MutableInstance instance;

	transient private TextLabels textLabels=new EmptyLabels();

	protected String requiredAnnotation=null;

	protected String requiredAnnotationFileToLoad=null;

	protected AnnotatorLoader annotatorLoader=null;

	/** Create a feature extractor */
	public SpanFE(){
	}

	//
	// getters and setters
	// 

	/**
	 * Set the policy for creating features.
	 * 
	 * @param p
	 *          should be one of SpanFE.STORE_AS_BINARY, SpanFE.STORE_AS_COUNTS,
	 *          SpanFE.STORE_COMPACTLY
	 */
	public void setFeatureStoragePolicy(int p){
		this.featureStoragePolicy=p;
	}

	/**
	 * Simultaneously specify an annotator to run before feature generation and a
	 * mixup file or class that generates it.
	 */
	public void setRequiredAnnotation(String requiredAnnotation,
			String annotationProvider){
		setRequiredAnnotation(requiredAnnotation);
		setAnnotationProvider(annotationProvider);
	}

	//
	// simpler getter-setter interface, e.g. for GUI configuration
	//

	/** Specify an annotator to run before feature generation. */
	@Override
	public void setRequiredAnnotation(String requiredAnnotation){
		this.requiredAnnotation=requiredAnnotation;
	}

	@Override
	public String getRequiredAnnotation(){
		return requiredAnnotation==null?"":requiredAnnotation;
	}

	/**
	 * Specify a mixup file or java class to use to provide the annotation.
	 */
	public void setAnnotationProvider(String classNameOrMixupFileName){
		this.requiredAnnotationFileToLoad=classNameOrMixupFileName;
	}

	public String getAnnotationProvider(){
		return requiredAnnotationFileToLoad==null?"":requiredAnnotationFileToLoad;
	}

	@Override
	public void setAnnotatorLoader(AnnotatorLoader newLoader){
		this.annotatorLoader=newLoader;
	}

	//
	// preprocessing for extraction
	//

	/** Make sure the required annotation is present. */
	public void requireMyAnnotation(TextLabels labels){
		labels.require(requiredAnnotation,requiredAnnotationFileToLoad,
				annotatorLoader);
	}

	//
	// extraction
	//

//	/** @deprecated Use extractInstance(TextLabels labels,Span s) */
//	final public Instance extractInstance(Span span){
//		instance=new MutableInstance(span,span.getDocumentGroupId());
//		extractFeatures(span);
//		return instance;
//	}

	/** Extract an Instance from a span */
	@Override
	final public Instance extractInstance(TextLabels labels,Span span){
		instance=new MutableInstance(span,span.getDocumentGroupId());
		textLabels=labels;
		extractFeatures(labels,span);
		return instance;
	}

	/**
	 * Starts a 'pipeline' of extraction steps, and adds the resulting features to
	 * the instance being built.
	 * <p>
	 * As an example: <code>fe.from(s).tokens(s).eq().emit()</code> adds
	 * bag-of-words type features.
	 */
	final public SpanResult from(Span s){
		return new SpanResult(new String[0],this,s);
	}

	/**
	 * Starts a 'pipeline' of extraction steps, and adds the resulting features to
	 * the instance being built.
	 * 
	 * <p>
	 * This is intended to be used as an alternative to using the SpanFE class to
	 * build an Span2Instance converter, eg
	 * 
	 * <pre><code>
	 * fe=new Span2Instance(){
	 * 
	 * 	public extractInstance(Span s){
	 * 		FeatureBuffer buf=new FeatureBuffer(s);
	 * 		SpanFE.from(s,buf).tokens().emit();
	 * 		SpanFE.from(s,buf).left().subspan(-2,2).emit();
	 * 		SpanFE.from(s,buf).right().subspan(0,2).emit();
	 * 		buf.getInstance();
	 * 	}
	 * }
	 * </code></pre>
	 * 
	 */
	final static public SpanResult from(Span s,FeatureBuffer buffer){
		return new SpanResult(new String[0],buffer,s);
	}

	/**
	 * Called by some SpanFE.Result subclasses when a 'pipeline' of extraction
	 * steps is ended with a StringBagResult.
	 */

	public void emit(StringBagResult result){
		for(Iterator<String> i=result.asBag().iterator();i.hasNext();){
			String s=i.next();
			Feature f=new Feature(result.extend(s));
			if(featureStoragePolicy==STORE_AS_BINARY){
				instance.addBinary(f);
			}else{
				int c=result.asBag().getCount(s);
				if(featureStoragePolicy==STORE_COMPACTLY&&c==1)
					instance.addBinary(f);
				else
					instance.addNumeric(f,c);
			}
		}
	}

	/**
	 * Called by some SpanFE.Result subclass when a 'pipeline' of extraction steps
	 * is ended with a TokenSetResult.
	 */

	public void emit(TokenSetResult result){
		emit(result.eq());
	}

	/**
	 * Called by some SpanFE.Result subclass when a 'pipeline' of extraction steps
	 * is ended with a SpanSetResult.
	 */
	public void emit(SpanSetResult result){
		emit(result.tokens());
	}

	/**
	 * Called by some SpanFE.Result subclass when a 'pipeline' of extraction steps
	 * is ended with a SpanResult.
	 */
	public void emit(SpanResult result){
		emit(result.tokens());
	}

	/**
	 * Implement this with a specific set of SpanFE 'pipelines'. Each pipeline
	 * will typically start with 'start(span)' and end with 'emit()'.
	 * 
	 */
	public void extractFeatures(Span span){
		throw new IllegalStateException(
				"you probably meant to use extractFeatures(labels,span) instead");
	}

	/**
	 * Implement this with a specific set of SpanFE 'pipelines'. Each pipeline
	 * will typically start with 'start(span)' and end with 'emit()'.
	 */
	abstract public void extractFeatures(TextLabels labels,Span span);

	/** Subclass this to change the tracing behavior. */
	public void trace(Result result){
		String[] name=result.getName();
		for(int i=0;i<name.length;i++)
			System.out.print(" "+name[i]);
		System.out.println(" -> "+result);
	}

	//
	// SpanFE.Result classes
	//

	/** Encodes an intermediate result of the SpanFE process. */
	static abstract public class Result{

		protected String[] name;

		protected SpanFE fe;

		public Result(String[] name,SpanFE fe){
			this.name=name;
			this.fe=fe;
			if(fe==null)
				throw new IllegalArgumentException("null fe");
		}

		// extend the name
		public String[] extend(String addition){
			return extend(name,addition);
		}

		public String[] extend(String[] partial,String addition){
			String[] extension=new String[partial.length+1];
			for(int i=0;i<partial.length;i++)
				extension[i]=partial[i];
			extension[partial.length]=addition;
			return extension;
		}

		// for traces
		protected Result doTrace(){
			fe.trace(this);
			return this;
		}

		public String[] getName(){
			return name;
		}

		/** Terminates a feature extraction pipeline by actually emitting features. */
		abstract public void emit();
	}

	/**
	 * An intermediate result of a SpanFE process where the object being operated
	 * on is a Set of something.
	 */
	abstract static public class SetResult<T> extends Result{

		protected SortedSet<T> set;

		public SetResult(String[] name,SpanFE fe,SortedSet<T> set){
			super(name,fe);
			this.set=set;
			if(this.set==null)
				throw new IllegalArgumentException("null set");
		}

		/**
		 * Convert to a plain old set.
		 */
		public Set<T> asSet(){
			return set;
		}

		/**
		 * Filter the set using a user-defined filter.
		 */
		protected SortedSet<T> applyFilter(Filter f){
			SortedSet<T> s=new TreeSet<T>();
			for(Iterator<T> i=set.iterator();i.hasNext();){
				T o=i.next();
				if(f.match(o))
					s.add(o);
			}
			return s;
		}

		/**
		 * Modify each element in the set using a user-defined function.
		 */
		protected SortedSet<T> mapFunction(Function f){
			SortedSet<T> s=new TreeSet<T>();
			for(Iterator<T> i=set.iterator();i.hasNext();){
				s.add(f.apply(i.next()));
			}
			return s;
		}
	}

	/**
	 * An intermediate result of an SpanFE process where a span is being
	 * processed.
	 */
	static public class SpanResult extends Result{

		private Span s;

		public SpanResult(String[] name,SpanFE fe,Span s){
			super(name,fe);
			this.s=s;
		}

		public SpanResult trace(){
			return (SpanResult)doTrace();
		}

		@Override
		public void emit(){
			fe.emit(this);
		}

		@Override
		public String toString(){
			return "[SpanResult: "+s+"]";
		}

		public Span getSpan(){
			return s;
		}

		/**
		 * Move to the span consisting of all tokens in the same document that
		 * precede the current span.
		 */
		public SpanResult left(){
			Span lSpan=s.documentSpan().subSpan(0,s.documentSpanStartIndex());
			return new SpanResult(extend("left"),fe,lSpan);
		}

		/**
		 * Move to the span consisting of all tokens in the same document that
		 * follow the current span.
		 */
		public SpanResult right(){
			Span rSpan=
					s.documentSpan().subSpan(s.documentSpanStartIndex()+s.size(),
							s.documentSpan().size()-s.documentSpanStartIndex()-s.size());
			return new SpanResult(extend("right"),fe,rSpan);
		}

		/**
		 * Move to the document containing this span.
		 */
		public SpanResult doc(){
			Span docSpan=s.documentSpan();
			return new SpanResult(extend("doc"),fe,docSpan);
		}

		/**
		 * Move to a set of all spans of the named type that are contained by the
		 * current span.
		 */
		public SpanSetResult contains(String type){
			SortedSet<Span> set=new TreeSet<Span>();
			for(Iterator<Span> i=
					fe.textLabels.instanceIterator(type,s.getDocumentId());i.hasNext();){
				Span other=i.next();
				if(s.contains(other)){
					set.add(other);
				}
			}
			return new SpanSetResult(extend("contains_"+type),fe,set);
		}

		/**
		 * Move to the specified subspan of the current span. Invalid indices will
		 * be trimmed to a valid size. Negative indices mean to extract a subspan
		 * from the end of the current span, e.g., subSpan(-2,2) means to extract a
		 * span containing the last two tokens.
		 */
		public SpanResult subSpan(int lo,int len){
			if(s.size()==0)
				return this;
			if(lo>=0){
				lo=Math.min(lo,s.size()-1);
				len=Math.min(s.size()-lo,len);
				return new SpanResult(extend("subspan_"+lo+"_"+len),fe,s
						.subSpan(lo,len));
			}else if(lo<0){
				lo=Math.max(s.size()+lo,0);
				len=Math.min(s.size()-lo,len);
				return new SpanResult(extend("subspanNeg_"+lo+"_"+len),fe,s.subSpan(lo,
						len));
			}else{
				throw new IllegalArgumentException("illegal subSpan indices "+lo+", "+
						len);
			}
		}

		/** Move to the set of all tokens contained by this span. */
		public TokenSetResult tokens(){
			SortedSet<Token> set=new TreeSet<Token>();
			for(int i=0;i<s.size();i++){
				set.add(s.getToken(i));
			}
			return new TokenSetResult(extend("tokens"),fe,set);
		}

		/**
		 * Move to the specified token inside the span. A negative index means to
		 * count from the end. An invalid index will result in an empty
		 * TokenSetResult.
		 */
		public TokenSetResult token(int index){
			String namex;
			int index1;
			if(index<0){
				index1=s.size()+index;
				namex="tokenNeg_"+(-index);
			}else{
				index1=index;
				namex="token_"+index;
			}
			SortedSet<Token> set=new TreeSet<Token>();
			if(index1>=0&&index1<s.size()){
				set.add(s.getToken(index1));
			}
			return new TokenSetResult(extend(namex),fe,set);
		}

		/** Move to the string value of the span. */
		public StringBagResult eq(){
			Bag<String> stringBag=new Bag<String>();
			stringBag.add(s.asString());
			return new StringBagResult(extend("eq"),fe,stringBag);
		}

		/**
		 * Make length of the span a feature. Eg feature is #tokens=3 for a 3-token
		 * span.
		 */
		public StringBagResult size(){
			Bag<String> stringBag=new Bag<String>();
			stringBag.add("#tokens",s.size());
			return new StringBagResult(name,fe,stringBag);
		}

		/**
		 * Make exact length of span a feature. Eg, feature is #tokens.3=1 for a
		 * 3-token span, #tokens_2=1 for a two-token span.
		 */
		public StringBagResult exactSize(){
			Bag<String> stringBag=new Bag<String>();
			stringBag.add("#tokens_"+s.size());
			return new StringBagResult(name,fe,stringBag);
		}
	}

	/**
	 * An intermediate result of a SpanFE process where the object being operated
	 * on is a set of spans.
	 */
	static public class SpanSetResult extends SetResult<Span>{

		public SpanSetResult(String[] name,SpanFE fe,SortedSet<Span> set){
			super(name,fe,set);
		}

		public SpanSetResult trace(){
			return (SpanSetResult)doTrace();
		}

		@Override
		public void emit(){
			fe.emit(this);
		}

		@Override
		public String toString(){
			return "[SpanSetResult: "+set+"]";
		}

		/**
		 * Move to the first span in the set.
		 */
		public SpanSetResult first(){
			SortedSet<Span> newSet=new TreeSet<Span>();
			if(set.size()>0)
				newSet.add(set.first());
			return new SpanSetResult(extend("first"),fe,newSet);
		}

		/**
		 * Move to the last span in the set.
		 */
		public SpanSetResult last(){
			SortedSet<Span> newSet=new TreeSet<Span>();
			if(set.size()>0)
				newSet.add(set.last());
			return new SpanSetResult(extend("last"),fe,newSet);
		}

		/**
		 * Find the set of all tokens contained by any span in the set.
		 */
		public TokenSetResult tokens(){
			SortedSet<Token> accum=new TreeSet<Token>();
			for(Iterator<Span> i=set.iterator();i.hasNext();){
				SpanResult r=new SpanResult(name,fe,i.next());
				accum.addAll(r.tokens().asSet());
			}
			return new TokenSetResult(extend("tokens"),fe,accum);
		}

		/** Move a set of all string values of spans in the set */
		public StringBagResult eq(){
			Bag<String> stringBag=new Bag<String>();
			for(Iterator<Span> i=set.iterator();i.hasNext();){
				stringBag.add(i.next().asString());
			}
			return new StringBagResult(extend("eq"),fe,stringBag);
		}

		/** Filter out spans that don't match the filter. */
		public SpanSetResult filter(Filter f){
			return new SpanSetResult(extend("filter_"+f.getName()),fe,applyFilter(f));
		}

		public SpanSetResult map(Function f){
			return new SpanSetResult(extend("map_"+f.getName()),fe,mapFunction(f));
		}
	}

	/**
	 * An intermediate result of a SpanFE process where the object being operated
	 * on is a set of tokens.
	 */
	static public class TokenSetResult extends SetResult<Token>{

		public TokenSetResult(String[] name,SpanFE fe,SortedSet<Token> set){
			super(name,fe,set);
		}

		public TokenSetResult trace(){
			return (TokenSetResult)doTrace();
		}

		@Override
		public void emit(){
			fe.emit(this);
		}

		@Override
		public String toString(){
			return "[TokenSetResult: "+set+"]";
		}

		/** Find all values of a token in this set. */
		public StringBagResult eq(){
			Bag<String> stringBag=new Bag<String>();
			for(Iterator<Token> i=set.iterator();i.hasNext();){
				Token token=i.next();
				stringBag.add(token.getValue());
			}
			return new StringBagResult(extend("eq"),fe,stringBag);
		}

		/** Find the value of some given property. */
		public StringBagResult prop(String property){
			Bag<String> stringBag=new Bag<String>();
			for(Iterator<Token> i=set.iterator();i.hasNext();){
				Token token=i.next();
				String value=fe.textLabels.getProperty(token,property);
				if(value!=null){
					stringBag.add(value);
				}
			}
			return new StringBagResult(extend(property),fe,stringBag);
		}

		/** Filter out tokens that have some property set to a non-null value. */
		public TokenSetResult hasProp(String property){
			SortedSet<Token> filteredSet=new TreeSet<Token>();
			for(Iterator<Token> i=set.iterator();i.hasNext();){
				Token token=i.next();
				String value=fe.textLabels.getProperty(token,property);
				if(value!=null)
					filteredSet.add(token);
			}
			return new TokenSetResult(extend("hasProp_"+property),fe,filteredSet);
		}

		/**
		 * Filter out tokens that have a property set to some particular value. A
		 * targetValue of 'null' will filter out tokens with null values of the
		 * property.
		 */
		public TokenSetResult hasProp(String property,String targetValue){
			SortedSet<Token> filteredSet=new TreeSet<Token>();
			for(Iterator<Token> i=set.iterator();i.hasNext();){
				Token token=i.next();
				String value=fe.textLabels.getProperty(token,property);
				if((targetValue==null&&value==null)||
						(targetValue!=null&&targetValue.equals(value)))
					filteredSet.add(token);
			}
			String targetValueTag=(targetValue==null)?"NULL":targetValue;
			return new TokenSetResult(extend("hasProp_"+property+"_"+targetValueTag),
					fe,filteredSet);
		}

	}

	/**
	 * An intermediate result of a SpanFE process where the object being operated
	 * on is a set of strings.
	 */
	static public class StringBagResult extends SetResult<String>{

		private Bag<String> bag;

		public StringBagResult(String[] name,SpanFE fe,Bag<String> bag){
			super(name,fe,bag.asSet());
			this.bag=bag;
		}

		@Override
		public void emit(){
			fe.emit(this);
		}

		public StringBagResult trace(){
			return (StringBagResult)doTrace();
		}

		@Override
		public String toString(){
			return "[StringBagResult: "+bag+"]";
		}

		public Bag<String> asBag(){
			return bag;
		}

		public StringBagResult lc(){
			Bag<String> lcBag=new Bag<String>();
			for(Iterator<String> i=bag.iterator();i.hasNext();){
				String str=i.next();
				int n=bag.getCount(str);
				lcBag.add(str.toLowerCase(),n);
			}
			return new StringBagResult(extend("lc"),fe,lcBag);
		}

		public StringBagResult toConst(String replacement){
			Bag<String> trBag=new Bag<String>();
			for(Iterator<String> i=bag.iterator();i.hasNext();){
				String str=i.next();
				int n=bag.getCount(str);
				trBag.add(replacement,n);
			}
			return new StringBagResult(extend("toConst"),fe,trBag);
		}

		public StringBagResult tr(String regex,String replacement){
			Bag<String> trBag=new Bag<String>();
			for(Iterator<String> i=bag.iterator();i.hasNext();){
				String str=i.next();
				int n=bag.getCount(str);
				trBag.add(str.replaceAll(regex,replacement),n);
			}
			return new StringBagResult(extend("tr/"+regex+"/"+replacement),fe,trBag);
		}

		public StringBagResult charTypes(){
			Bag<String> trBag=new Bag<String>();
			for(Iterator<String> i=bag.iterator();i.hasNext();){
				String str=i.next();
				String charTypes=
						str.replaceAll("[A-Z]","A").replaceAll("[a-z]","a").replaceAll(
								"[0-9]","0");
				int n=bag.getCount(str);
				trBag.add(charTypes,n);
			}
			return new StringBagResult(extend("charTypes"),fe,trBag);
		}

		public StringBagResult charTypePattern(){
			Bag<String> trBag=new Bag<String>();
			for(Iterator<String> i=bag.iterator();i.hasNext();){
				String str=i.next();
				String pattern=
						str.replaceAll("[A-Z]+","X+").replaceAll("[a-z]+","x+").replaceAll(
								"[0-9]+","9+");
				int n=bag.getCount(str);
				trBag.add(pattern,n);
			}
			return new StringBagResult(extend("charTypePattern"),fe,trBag);
		}

		// Removes punctuation and numbers
		public StringBagResult punk(){
			Bag<String> punkBag=new Bag<String>();
			for(Iterator<String> i=bag.iterator();i.hasNext();){
				String str=i.next();
				int n=bag.getCount(str);
				Pattern p=Pattern.compile("[\\W\\d]+");
				Matcher m=p.matcher(str);
				if(!m.find()){
					punkBag.add(str,n);
				}
			}
			return new StringBagResult(extend("punk"),fe,punkBag);
		}

		// Use or Remove words in String Array
		public StringBagResult stopwords(String action){
			String[] wordArray=StopWords.LONG; // change with SHORT
			Bag<String> swBag=new Bag<String>();
			for(Iterator<String> i=bag.iterator();i.hasNext();){
				String str=i.next();
				int n=bag.getCount(str);
				if(action.equalsIgnoreCase("use")){
					// "use" words as sole features
					for(int j=0;j<wordArray.length;j++){
						if(wordArray[j].equals(str)){
							swBag.add(str,n);
						}
					}
				}else if(action.equalsIgnoreCase("remove")){
					// "remove" words from retieved features
					boolean isAbsent=true;
					for(int j=0;j<wordArray.length;j++){
						if((wordArray[j].equals(str))){
							isAbsent=false;
						}
					}
					if(isAbsent){
						swBag.add(str,n);
					}
				}else{
					throw new IllegalArgumentException("Error: action is missing!");
				}
			}
			return new StringBagResult(extend("stopwords-"+action),fe,swBag);
		}

		/** Use ONLY words in Dictionary File. */
		public StringBagResult usewords(String filename) throws IOException{
			Bag<String> uwBag=new Bag<String>();
			for(Iterator<String> i=bag.iterator();i.hasNext();){
				String str=i.next();
				int n=bag.getCount(str);
				File dictFile=new File(filename);
				FileReader fr=new FileReader(dictFile);
				BufferedReader in=new BufferedReader(fr);
				String line;
				while((line=in.readLine())!=null){
					line=line.trim();
					// Check whether str is in Dictionary File
					if(line.equals(str)){
						uwBag.add(str,n);
					}
				}
			}
			return new StringBagResult(extend("usewords"),fe,uwBag);
		}
	}

	/**
	 * An abstract class that can be used to filter SpanSetResults.
	 */
	static public abstract class Filter{

		/**
		 * A short name, used to help construct feature names associated with this
		 * filter.
		 */
		abstract public String getName();

		/**
		 * Should return true for all items that will be accepted by the filter.
		 */
		abstract public boolean match(Object o);
	}

	/**
	 * An abstract class that can be used to change SpanSets
	 */
	static public abstract class Function{

		/**
		 * A short name, used to help construct feature names associated with this
		 * filter.
		 */
		abstract public String getName();

		/** Should return the modified object. */
		abstract public <T>T apply(T o);
	}
}