package edu.stanford.nlp.process;
import edu.stanford.nlp.ling.Document;
import edu.stanford.nlp.ling.Word;
import java.io.File;
/**
* Filter which removes stop-listed words.
*
* @author Sepandar Kamvar (sdkamvar@stanford.edu)
*/
public class StoplistFilter<L, F> implements DocumentProcessor<Word, Word, L, F> {
private StopList stoplist;
/**
* Create a new StopListFilter with a small default stoplist
*/
public StoplistFilter() {
this(new StopList());
}
/**
* Create a new StopListFilter with the stoplist given in <code>stoplistfile</code>
*/
public StoplistFilter(String stoplistfile) {
this(new StopList(new File(stoplistfile)));
}
/**
* Create a new StoplistFilter with the given StopList.
*/
public StoplistFilter(StopList stoplist) {
this.stoplist = stoplist;
}
/**
* Returns a new Document with the same meta-data as <tt>in</tt> and the same words
* except those on the stop list this filter was constructed with.
*/
public Document<L, F, Word> processDocument(Document<L, F, Word> in) {
Document<L, F, Word> out = in.blankDocument();
for (Word w: in) {
if (!stoplist.contains(w)) {
out.add(w);
}
}
return (out);
}
}