package kea.stemmers; /** * A basic stemmer that only performs the first step of the * PorterStemmer algorithm: removing of the plural endings. * @author olena * */ public class SremovalStemmer extends Stemmer { /** * */ private static final long serialVersionUID = 1L; public String stem(String str) { // check for zero length if (str.length() > 3) { // all characters must be letters char[] c = str.toCharArray(); for (int i = 0; i < c.length; i++) { if (!Character.isLetter(c[i])) { return str.toLowerCase(); } } } else { return str.toLowerCase(); } str = step1a(str); return str.toLowerCase(); } // end stem protected String step1a (String str) { // SSES -> SS if (str.endsWith("sses")) { return str.substring(0, str.length() - 2); // IES -> I } else if (str.endsWith("ies")) { return str.substring(0, str.length() - 2); // SS -> S } else if (str.endsWith("ss")) { return str; // S -> } else if (str.endsWith("s")) { return str.substring(0, str.length() - 1); } else { return str; } } // end step1a /* ------------------------------------------------------- The following are functions to help compute steps 1 - 5 ------------------------------------------------------- */ // does string end with 's'? protected boolean endsWithS(String str) { return str.endsWith("s"); } // end function // does string contain a vowel? protected boolean containsVowel(String str) { char[] strchars = str.toCharArray(); for (int i = 0; i < strchars.length; i++) { if (isVowel(strchars[i])) return true; } // no aeiou but there is y if (str.indexOf('y') > -1) return true; else return false; } // end function // is char a vowel? public boolean isVowel(char c) { if ((c == 'a') || (c == 'e') || (c == 'i') || (c == 'o') || (c == 'u')) return true; else return false; } // end function // does string end with a double consonent? protected boolean endsWithDoubleConsonent(String str) { char c = str.charAt(str.length() - 1); if (c == str.charAt(str.length() - 2)) if (!containsVowel(str.substring(str.length() - 2))) { return true; } return false; } // end function // returns a CVC measure for the string protected int stringMeasure(String str) { int count = 0; boolean vowelSeen = false; char[] strchars = str.toCharArray(); for (int i = 0; i < strchars.length; i++) { if (isVowel(strchars[i])) { vowelSeen = true; } else if (vowelSeen) { count++; vowelSeen = false; } } // end for return count; } // end function // does stem end with CVC? protected boolean endsWithCVC (String str) { char c, v, c2 = ' '; if (str.length() >= 3) { c = str.charAt(str.length() - 1); v = str.charAt(str.length() - 2); c2 = str.charAt(str.length() - 3); } else { return false; } if ((c == 'w') || (c == 'x') || (c == 'y')) { return false; } else if (isVowel(c)) { return false; } else if (!isVowel(v)) { return false; } else if (isVowel(c2)) { return false; } else { return true; } } // end function } // end class