package edu.cmu.minorthird.text; import java.util.Iterator; /** A very simple stemming algorithm. * * @author William Cohen */ public class BoneheadStemmer { public final static String STEM_PROP = "stem"; public BoneheadStemmer() {;} public void stem(TextBase base,MonotonicTextLabels labels) { for (Iterator<Span> i = base.documentSpanIterator(); i.hasNext(); ) { Span span = i.next(); for (int j=0; j<span.size(); j++) { Token token = span.getToken(j); labels.setProperty(token,STEM_PROP,stem(token.getValue())); } } } public String stem(String s) { String lc = s.toLowerCase(); if (lc.length()<4) return lc; else if (lc.endsWith("tion")) return lc.substring(0,lc.length()-4); else if (lc.endsWith("ed")) return lc.substring(0,lc.length()-2); else if (lc.endsWith("es")) return lc.substring(0,lc.length()-2); else if (lc.endsWith("ly")) return lc.substring(0,lc.length()-2); else if (lc.endsWith("s")) return lc.substring(0,lc.length()-1); else return lc; } static public void main(String[] args) { BoneheadStemmer stemmer = new BoneheadStemmer(); for (int i=0; i<args.length; i++) { System.out.println("stem of '"+args[i]+"' = '"+stemmer.stem(args[i])+"'"); } } }