package maui.stemmers; /** * Wrapper for the Snowball stemmer for Spanish // use stemSB * Or translation of the Stemmer implemented in C * i found here: * http://members.unine.ch/jacques.savoy/clef/index.html * * @author Olena Medelyan */ public class SpanishStemmer extends Stemmer { private static final long serialVersionUID = 1L; private SpanishStemmerSB stemmer = new SpanishStemmerSB(); public String stemSB(String str) { stemmer.setCurrent(str); stemmer.stem(); return stemmer.getCurrent(); } /* Spanish stemmer tring to remove inflectional suffixes */ public String stem(String word) { int len = word.length()-1; if (len > 3) { word = removeSpanishAccent(word); if (word.endsWith("eses")) { // corteses -> cortÈs word = word.substring(0,len-1); return word; } if (word.endsWith("ces")) { // dos veces -> una vez word = word.substring(0,len-2); word = word + 'z'; return word; } if (word.endsWith("os") || word.endsWith("as") || word.endsWith("es")) { // ending with -os, -as or -es word = word.substring(0,len-1); return word; } if (word.endsWith("o") || word.endsWith("a") || word.endsWith("e")) { // ending with -o, -a, or -e word = word.substring(0,len-1); return word; } } return word; } private String removeSpanishAccent (String word) { word = word.replaceAll("‡|·|‚|‰","a"); word = word.replaceAll("Ú|Û|Ù|ˆ","o"); word = word.replaceAll("Ë|È|Í|Î","e"); word = word.replaceAll("˘|˙|˚|¸","a"); word = word.replaceAll("Ï|Ì|Ó|Ô","a"); return word; } /** * The main method. // for testing */ public static void main(String[] ops) { SpanishStemmer s = new SpanishStemmer(); System.out.println(s.stem("veces")); } }