package kea.stemmers; /** * Wrapper for the Snowball stemmer for Spanish // use stemSB * Or translation of the Stemmer implemented in C * i found here: * http://members.unine.ch/jacques.savoy/clef/index.html * * @author Olena Medelyan */ public class SpanishStemmer extends Stemmer { private SpanishStemmerSB stemmer = new SpanishStemmerSB(); public String stemSB(String str) { stemmer.setCurrent(str); stemmer.stem(); return stemmer.getCurrent(); } /* Spanish stemmer tring to remove inflectional suffixes */ public String stem(String word) { int len = word.length()-1; if (len > 3) { word = removeSpanishAccent(word); if (word.endsWith("eses")) { // corteses -> cortés word = word.substring(0,len-1); return word; } if (word.endsWith("ces")) { // dos veces -> una vez word = word.substring(0,len-2); word = word + 'z'; return word; } if (word.endsWith("os") || word.endsWith("as") || word.endsWith("es")) { // ending with -os, -as or -es word = word.substring(0,len-1); return word; } if (word.endsWith("o") || word.endsWith("a") || word.endsWith("e")) { // ending with -o, -a, or -e word = word.substring(0,len-1); return word; } } return word; } private String removeSpanishAccent (String word) { word = word.replaceAll("à|á|â|ä","a"); word = word.replaceAll("ò|ó|ô|ö","o"); word = word.replaceAll("è|é|ê|ë","e"); word = word.replaceAll("ù|ú|û|ü","a"); word = word.replaceAll("ì|í|î|ï","a"); return word; } /** * The main method. // for testing */ public static void main(String[] ops) { SpanishStemmer s = new SpanishStemmer(); System.out.println(s.stem("veces")); } }