package outputter.knowledge; import java.util.ArrayList; import java.util.HashSet; import java.util.Hashtable; import java.util.LinkedHashSet; import java.util.List; import org.apache.log4j.Logger; public class Wordforms { private static final Logger LOGGER = Logger.getLogger(Wordforms.class); static String adjective_suffixes = "e|ed|er|ent|-like|like|shaped|-shaped|y|ly|ic|ted|ate|led|oid|ion|ied|ous|ing|form|iform|ally|ation|ure|al|ical|ication|sion|ded"; static Hashtable<String,LinkedHashSet<String>> adjectivecache =new Hashtable<String,LinkedHashSet<String>>(); public static LinkedHashSet<String> toAdjective(String word) { //if the word is found in cache it returns it else, adjective forms are generated and a copy is stored in cache if(adjectivecache.get(word)!=null) return adjectivecache.get(word); LinkedHashSet<String> forms = new LinkedHashSet<String>(); if(word.trim().length()==0) return forms; String suffix[] = adjective_suffixes.split("\\|"); ArrayList<String> wordforms = stemforms(word);//Stems the word and return all the stemmed form forms.addAll(wordforms); for(String word1:wordforms) { for(String suf:suffix) forms.add(word1.trim()+suf);//add all adjective suffixes to all different stems } adjectivecache.put(word, forms); return forms; } private static ArrayList<String> stemforms(String word) { String suffix[] = adjective_suffixes.split("\\|"); ArrayList<String> wordforms = new ArrayList<String>(); wordforms.add(word);//the original word should be one of the forms //Remove plural form of the word =>es and ies can also be considered when needed if(word.matches(".*s")) { word=word.substring(0,word.lastIndexOf("s")); wordforms.add(word); } for(String suf:suffix) { if((word.lastIndexOf(suf)!=-1)&&(word.matches(".*("+suf+")$")==true)) wordforms.add(word.substring(0,word.lastIndexOf(suf))); } return wordforms; } public static void main(String[] args) { HashSet<String> forms = toAdjective("rhomboid"); for(String form:forms) { if(form.equals("rhombic")) System.out.println(form); } } }