package semanticMarkup.know.lib;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import semanticMarkup.know.IPOSKnowledgeBase;
import semanticMarkup.ling.pos.POS;
import com.google.inject.Inject;
import com.google.inject.name.Named;
import edu.mit.jwi.Dictionary;
import edu.mit.jwi.IDictionary;
import edu.mit.jwi.RAMDictionary;
import edu.mit.jwi.item.IIndexWord;
import edu.mit.jwi.item.ISenseEntry;
import edu.mit.jwi.item.IWord;
import edu.mit.jwi.item.IWordID;
import edu.mit.jwi.morph.WordnetStemmer;
/**
* WordNetPOSKnowledgeBase poses an IPOSKnowledgeBase by relying on WordNet
* Access to dictionary is to be synchronized as the dictionary is cached. Hence the underlying data structures constatnly subject to change.
* Because of this parallel access to the dictionary may cause conflicts
* (e.g. first thread causes cached dictionary to change its content while second iterates over dictionary content)
* @author rodenhausen
*/
public class WordNetPOSKnowledgeBase implements IPOSKnowledgeBase {
private IDictionary dictionary;
/**
* @param path of the wordnet source files
* @param loadInRAM specified whether the dictionary should be loaded into RAM or read from disk when needed
* @throws IOException
*/
@Inject
public WordNetPOSKnowledgeBase(@Named("WordNetAPI_Sourcefile") String path, @Named("WordNetAPI_LoadInRAM") boolean loadInRAM) throws IOException {
if(loadInRAM)
dictionary = new RAMDictionary(new File(path), RAMDictionary.BACKGROUND_LOAD);
else
dictionary = new Dictionary(new File(path));
dictionary.open();
}
@Override
// public boolean isNoun(String word) {
// synchronized(dictionary) {
// return dictionary.getIndexWord(word, edu.mit.jwi.item.POS.NOUN) != null;
// }
// }
public boolean isNoun(String word) {
WordnetStemmer myWordnetStemmer = new WordnetStemmer(dictionary);
List<String> stems = myWordnetStemmer.findStems(word, edu.mit.jwi.item.POS.NOUN);
for (int i = 0; i < stems.size(); i++) {
String wordStem = stems.get(i);
if(dictionary.getIndexWord(wordStem, edu.mit.jwi.item.POS.NOUN) != null) {
return true;
}
}
return false;
}
@Override
public boolean isAdjective(String word) {
synchronized(dictionary) {
return dictionary.getIndexWord(word, edu.mit.jwi.item.POS.ADJECTIVE) != null;
}
}
@Override
public boolean isAdverb(String word) {
synchronized(dictionary) {
return dictionary.getIndexWord(word, edu.mit.jwi.item.POS.ADVERB) != null;
}
}
@Override
public boolean isVerb(String word) {
synchronized(dictionary) {
return dictionary.getIndexWord(word, edu.mit.jwi.item.POS.VERB) != null;
}
}
/**
* Needs to be synchronized, otherwise not thread safe. Underlying linkedHashMap throws ConcurrentModificationException
*/
@Override
public POS getMostLikleyPOS(String word) {
WordnetStemmer stemmer = null;
synchronized(dictionary) {
stemmer = new WordnetStemmer(dictionary);
}
int maxCount = -1;
edu.mit.jwi.item.POS mostLikelyPOS = null;
for(edu.mit.jwi.item.POS pos : edu.mit.jwi.item.POS.values()) {
//From JavaDoc: The surface form may or may not contain whitespace or underscores, and may be in mixed case.
word = word.replaceAll("\\s", "").replaceAll("_", "");
List<String> stems = null;
synchronized(dictionary) {
stems = stemmer.findStems(word, pos);
}
for(String stem : stems) {
synchronized(dictionary) {
IIndexWord indexWord = dictionary.getIndexWord(stem, pos);
if(indexWord!=null) {
int count = 0;
for(IWordID wordId : indexWord.getWordIDs()) {
IWord aWord = dictionary.getWord(wordId);
//ISynset synset = aWord.getSynset();
//log(LogLevel.DEBUG, synset.getGloss());
ISenseEntry senseEntry = dictionary.getSenseEntry(aWord.getSenseKey());
//log(LogLevel.DEBUG, senseEntry.getSenseNumber());
count += senseEntry.getTagCount();
}
//int tagSenseCount = indexWord.getTagSenseCount();
//int wordIdCount = indexWord.getWordIDs().size();
if(count > maxCount) {
maxCount = count;
mostLikelyPOS = pos;
}
}
}
}
}
return translateWordNetPOSToPennPOS(mostLikelyPOS);
}
private POS translateWordNetPOSToPennPOS(edu.mit.jwi.item.POS pos) {
if(pos==null)
return null;
switch(pos) {
case NOUN:
return POS.NN;
case VERB:
return POS.VB;
case ADJECTIVE:
return POS.JJ;
case ADVERB:
return POS.RB;
default:
return null;
}
}
@Override
public boolean contains(String word) {
for(edu.mit.jwi.item.POS pos : edu.mit.jwi.item.POS.values()) {
synchronized(dictionary) {
WordnetStemmer stemmer = new WordnetStemmer(dictionary);
for(String stem : stemmer.findStems(word, pos)) {
IIndexWord indexWord = dictionary.getIndexWord(stem, pos);
if(indexWord!=null)
return true;
}
}
}
return false;
}
@Override
public List<String> getSingulars(String word) {
List<String> singulars = null;
synchronized(dictionary) {
WordnetStemmer stemmer = new WordnetStemmer(dictionary);
singulars = stemmer.findStems(word, edu.mit.jwi.item.POS.NOUN);
}
List<String> result = new ArrayList<String>();
TreeMap<Integer, List<String>> singularFrequencies = new TreeMap<Integer, List<String>>();
for(String singular : singulars) {
synchronized(dictionary) {
IIndexWord indexWord = dictionary.getIndexWord(singular, edu.mit.jwi.item.POS.NOUN);
if(indexWord!=null) {
//int tagSenseCount = indexWord.getTagSenseCount();
int wordIdCount = indexWord.getWordIDs().size();
if(!singularFrequencies.containsKey(wordIdCount))
singularFrequencies.put(wordIdCount, new ArrayList<String>());
singularFrequencies.get(wordIdCount).add(singular);
}
}
}
Map<Integer, List<String>> reverseMap = singularFrequencies.descendingMap();
for(Entry<Integer, List<String>> entry : reverseMap.entrySet())
result.addAll(entry.getValue());
if(result.isEmpty()) {
if(word.endsWith("ies")) {
Iterator<String> singularsIterator = singulars.iterator();
while(singularsIterator.hasNext()) {
String singular = singularsIterator.next();
if(singular.endsWith("y")) {
result.add(singular);
singularsIterator.remove();
}
}
result.addAll(singulars);
} else {
result = singulars;
}
}
return result;
}
public static void main(String[] args) throws IOException{
WordNetPOSKnowledgeBase wordNetAPI = new WordNetPOSKnowledgeBase("res//WordNet//WordNet-3.0//dict", false);
System.out.println(wordNetAPI.isNoun("apples"));
}
public boolean isSoleAdjective(String word) {
return (!this.isNoun(word))
&& (!this.isVerb(word))
&& (this.isAdjective(word))
&& (!this.isAdverb(word));
}
@Override
public void addVerb(String word) {}
@Override
public void addNoun(String word) {}
@Override
public void addAdjective(String word) {}
@Override
public void addAdverb(String word) {}
}