/** * OpenKM, Open Document Management System (http://www.openkm.com) * Copyright (c) 2006-2011 Paco Avila & Josep Llort * * No bytes were intentionally harmed during the development of this application. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License along * with this program; if not, write to the Free Software Foundation, Inc., * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package com.openkm.kea.stemmers; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Wrapper for the Snowball stemmer for Spanish // use stemSB * Or translation of the Stemmer implemented in C * i found here: * http://members.unine.ch/jacques.savoy/clef/index.html * * @author Olena Medelyan */ public class SpanishStemmer extends Stemmer { private static Logger log = LoggerFactory.getLogger(SpanishStemmer.class); private static final long serialVersionUID = 1L; private SpanishStemmerSB stemmer = new SpanishStemmerSB(); public String stemSB(String str) { stemmer.setCurrent(str); stemmer.stem(); return stemmer.getCurrent(); } /* Spanish stemmer tring to remove inflectional suffixes */ public String stem(String word) { int len = word.length()-1; if (len > 3) { word = removeSpanishAccent(word); if (word.endsWith("eses")) { // corteses -> cort�s word = word.substring(0,len-1); return word; } if (word.endsWith("ces")) { // dos veces -> una vez word = word.substring(0,len-2); word = word + 'z'; return word; } if (word.endsWith("os") || word.endsWith("as") || word.endsWith("es")) { // ending with -os, -as or -es word = word.substring(0,len-1); return word; } if (word.endsWith("o") || word.endsWith("a") || word.endsWith("e")) { // ending with -o, -a, or -e word = word.substring(0,len-1); return word; } } return word; } private String removeSpanishAccent (String word) { word = word.replaceAll("à|á|â|ä","a"); word = word.replaceAll("ò|ó|ô|ö","o"); word = word.replaceAll("è|é|ê|ë","e"); word = word.replaceAll("ù|ú|û|ü","a"); word = word.replaceAll("ì|í|î|ï","a"); return word; } /** * The main method. // for testing */ public static void main(String[] ops) { SpanishStemmer s = new SpanishStemmer(); log.info(s.stem("veces")); } }