WordNet.java example

Explorer
lucida-master
- lucida
package info.ephyra.nlp.semantics.ontologies;

import info.ephyra.questionanalysis.TermExpander;

import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;

import net.didion.jwnl.JWNL;
import net.didion.jwnl.JWNLException;
import net.didion.jwnl.data.IndexWord;
import net.didion.jwnl.data.IndexWordSet;
import net.didion.jwnl.data.POS;
import net.didion.jwnl.data.PointerUtils;
import net.didion.jwnl.data.Synset;
import net.didion.jwnl.data.Word;
import net.didion.jwnl.data.list.PointerTargetNode;
import net.didion.jwnl.data.list.PointerTargetNodeList;

/**
 * <p>An interface to <a href="http://wordnet.princeton.edu/">WordNet</a>, a
 * lexical database for the English language.</p>
 * 
 * <p>This class implements the interface <code>Ontology</code>.</p>
 * 
 * @author Nico Schlaefer
 * @version 2007-05-30
 */
public class WordNet implements Ontology {
	/** Indicates that a word is an adjective. */
	public static final POS ADJECTIVE = POS.ADJECTIVE;
	/** Indicates that a word is an adverb. */
	public static final POS ADVERB = POS.ADVERB;
	/** Indicates that a word is a noun. */
	public static final POS NOUN = POS.NOUN;
	/** Indicates that a word is a verb. */
	public static final POS VERB = POS.VERB;
	
	/** Maximum length of a path to an expansion. */
	public static final int MAX_PATH_LENGTH = 1;
	
	// relations for multiple parts of speech
	/** Weight for the relation 'synonym'. */
	private static final double SYNONYM_WEIGHT = 0.9;
	/** Weight for the relation 'hypernym'. */
	private static final double HYPERNYM_WEIGHT = 0.8;
	/** Weight for the relation 'hyponym'. */
	private static final double HYPONYM_WEIGHT = 0.7;
//	/** Weight for the relation 'see-also'. */
//	private static final double SEE_ALSO_WEIGHT = 0.5;
//	/** Weight for the relation 'gloss'. */
//	private static final double GLOSS_WEIGHT = 0.6;
//	/** Weight for the relation 'rgloss'. */
//	private static final double RGLOSS_WEIGHT = 0.2;
	
	// relations for verbs
	/** Weight for the relation 'entailing'. */
	private static final double ENTAILING_WEIGHT = 0.7;
	/** Weight for the relation 'causing'. */
	private static final double CAUSING_WEIGHT = 0.5;
	
	// relations for nouns
	/** Weight for the relation 'member-of'. */
	private static final double MEMBER_OF_WEIGHT = 0.5;
	/** Weight for the relation 'substance-of'. */
	private static final double SUBSTANCE_OF_WEIGHT = 0.5;
	/** Weight for the relation 'part-of'. */
	private static final double PART_OF_WEIGHT = 0.5;
	/** Weight for the relation 'has-member'. */
	private static final double HAS_MEMBER_WEIGHT = 0.5;
	/** Weight for the relation 'has-substance'. */
	private static final double HAS_SUBSTANCE_WEIGHT = 0.5;
	/** Weight for the relation 'has-part'. */
	private static final double HAS_PART_WEIGHT = 0.5;
	
	// relations for adjectives and adverbs
//	/** Weight for the relation 'pertainym'. */
//	private static final double PERTAINYM_WEIGHT = 0.5;
	
	/** WordNet dictionary. */
	private static net.didion.jwnl.dictionary.Dictionary dict;
	
	/**
	 * Initializes the wrapper for the WordNet dictionary.
	 * 
	 * @param properties property file
	 */
	public static boolean initialize(String properties) {
		try {
			File file = new File(properties);
			JWNL.initialize(new FileInputStream(file));
			
			dict = net.didion.jwnl.dictionary.Dictionary.getInstance();
		} catch (Exception e) {
			return false;
		}
		
		return true;
	}
	
	/**
	 * Checks if the word exists in WordNet.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is in WordNet
	 */
	public static boolean isWord(String word) {
		if (dict == null) return false;
		
		IndexWordSet indexWordSet = null;
		try {
			indexWordSet = dict.lookupAllIndexWords(word);
		} catch (JWNLException e) {}
		
		return indexWordSet.size() > 0;
	}
	
	/**
	 * Checks if the word exists in WordNet. Supports multi-token terms.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is in WordNet
	 */
	public static boolean isCompoundWord(String word) {
		if (dict == null) return false;
		
		// do not look up words with special characters other than '.'
		if (word.matches(".*?[^\\w\\s\\.].*+")) return false;
		
		IndexWordSet indexWordSet = null;
		try {
			indexWordSet = dict.lookupAllIndexWords(word);
		} catch (JWNLException e) {}
		
		// ensure that the word, and not just a substring, was found in WordNet
		int wordTokens = word.split("\\s", -1).length;
		int wordDots = word.split("\\.", -1).length;
		for (IndexWord indexWord : indexWordSet.getIndexWordArray()) {
			String lemma = indexWord.getLemma();
			int lemmaTokens = lemma.split("\\s", -1).length;
			int lemmaDots = lemma.split("\\.", -1).length;
			if (wordTokens == lemmaTokens && wordDots == lemmaDots) return true;
		}
		return false;
	}
	
	/**
	 * Checks if the word exists as an adjective.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is an adjective
	 */
	public static boolean isAdjective(String word) {
		if (dict == null) return false;
		
		IndexWord indexWord = null;
		try {
			indexWord = dict.lookupIndexWord(POS.ADJECTIVE, word);
		} catch (JWNLException e) {}
		
		return (indexWord != null) ? true : false;
	}
	
	/**
	 * Checks if the word exists as an adverb.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is an adverb
	 */
	public static boolean isAdverb(String word) {
		if (dict == null) return false;
		
		IndexWord indexWord = null;
		try {
			indexWord = dict.lookupIndexWord(POS.ADVERB, word);
		} catch (JWNLException e) {}
		
		return (indexWord != null) ? true : false;
	}
	
	/**
	 * Checks if the word exists as a noun.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is a noun
	 */
	public static boolean isNoun(String word) {
		if (dict == null) return false;
		
		IndexWord indexWord = null;
		try {
			indexWord = dict.lookupIndexWord(POS.NOUN, word);
		} catch (JWNLException e) {}
		
		return (indexWord != null) ? true : false;
	}
	
	/**
	 * Checks if the word exists as a noun. Supports multi-token terms.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is a noun
	 */
	public static boolean isCompoundNoun(String word) {
		if (dict == null) return false;
		
		// do not look up words with special characters other than '.'
		if (word.matches(".*?[^\\w\\s\\.].*+")) return false;
		
		IndexWord indexWord = null;
		try {
			indexWord = dict.lookupIndexWord(POS.NOUN, word);
		} catch (JWNLException e) {}
		if (indexWord == null) return false;
		
		// ensure that the word, and not just a substring, was found in WordNet
		int wordTokens = word.split("\\s", -1).length;
		int wordDots = word.split("\\.", -1).length;
		String lemma = indexWord.getLemma();
		int lemmaTokens = lemma.split("\\s", -1).length;
		int lemmaDots = lemma.split("\\.", -1).length;
		return wordTokens == lemmaTokens && wordDots == lemmaDots;
	}
	
	/**
	 * Checks if the word exists as a verb.
	 * 
	 * @param word a word
	 * @return <code>true</code> iff the word is a verb
	 */
	public static boolean isVerb(String word) {
		if (dict == null) return false;
		
		IndexWord indexWord = null;
		try {
			indexWord = dict.lookupIndexWord(POS.VERB, word);
		} catch (JWNLException e) {}
		
		return (indexWord != null) ? true : false;
	}
	
	/**
	 * Looks up the lemma of a word.
	 * 
	 * @param word a word
	 * @param pos its part of speech
	 * @return lemma or <code>null</code> if lookup failed
	 */
	public static String getLemma(String word, POS pos) {
		if (dict == null) return null;
		
		IndexWord indexWord = null;
		try {
			indexWord = dict.lookupIndexWord(pos, word);
		} catch (JWNLException e) {}
		if (indexWord == null) return null;
		
		String lemma = indexWord.getLemma();
		lemma = lemma.replace("_", " ");
		
		return lemma;
	}
	
	/**
	 * Looks up the lemma of a compound word.
	 * 
	 * @param word a word
	 * @param pos its part of speech
	 * @return lemma or <code>null</code> if lookup failed
	 */
	public static String getCompoundLemma(String word, POS pos) {
		// do not look up words with special characters other than '.'
		if (word.matches(".*?[^\\w\\s\\.].*+")) return null;
		
		String lemma = getLemma(word, pos);
		if (lemma == null) return null;
		
		// ensure that the word, and not just a substring, was found in WordNet
		int wordTokens = word.split("\\s", -1).length;
		int wordDots = word.split("\\.", -1).length;
		int lemmaTokens = lemma.split("\\s", -1).length;
		int lemmaDots = lemma.split("\\.", -1).length;
		if (wordTokens != lemmaTokens || wordDots != lemmaDots) return null;
		
		return lemma;
	}
	
	/**
	 * Looks up the most common synset of a word.
	 * 
	 * @param word a word
	 * @param pos its part of speech
	 * @return synset or <code>null</code> if lookup failed
	 */
	private static Synset getCommonSynset(String word, POS pos) {
		if (dict == null) return null;
		
		Synset synset = null;
		try {
			IndexWord indexWord = dict.lookupIndexWord(pos, word);
			if (indexWord == null) return null;
			synset = indexWord.getSense(1);
		} catch (JWNLException e) {}
		
		return synset;
	}
	
	/**
	 * Looks up the synsets that correspond to the nodes in a node list.
	 * 
	 * @param nodes node list
	 * @return synsets
	 */
	private static Synset[] getSynsets(PointerTargetNodeList nodes) {
		Synset[] synsets = new Synset[nodes.size()];
		
		for (int i = 0; i < nodes.size(); i++) {
			PointerTargetNode node  = (PointerTargetNode) nodes.get(i);
			synsets[i] = node.getSynset();
		}
		
		return synsets;
	}
	
	/**
	 * Looks up the lemmas of the words in a synset.
	 * 
	 * @param synset a synset
	 * @return lemmas
	 */
	private static String[] getLemmas(Synset synset) {
		Word[] words = synset.getWords();
		String[] lemmas = new String[words.length];
		
		for (int i = 0; i < words.length; i++) {
			lemmas[i] = words[i].getLemma();
			lemmas[i] = lemmas[i].replace("_", " ");
		}
		
		return lemmas;
	}
	
	/**
	 * Looks up the lemmas of the words in all synsets.
	 * 
	 * @param synsets the synsets
	 * @return lemmas
	 */
	private static String[] getLemmas(Synset[] synsets) {
		HashSet<String> lemmaSet = new HashSet<String>();
		
		for (Synset synset : synsets) {
			String[] lemmas = getLemmas(synset);
			for (String lemma : lemmas) lemmaSet.add(lemma);
		}
		
		return lemmaSet.toArray(new String[lemmaSet.size()]);
	}
	
	// relations for multiple parts of speech
	
	/**
	 * Looks up synonyms of the given word, assuming that it is used in its most
	 * common sense.
	 * 
	 * @param word a word
	 * @param pos its part of speech
	 * @return synonyms or <code>null</code> if lookup failed
	 */
	public static String[] getSynonyms(String word, POS pos) {
		Synset synset = getCommonSynset(word, pos);
		if (synset == null) return null;
		
		return getLemmas(synset);
	}
	
	/**
	 * Looks up hypernyms of the given word, assuming that it is used in its
	 * most common sense.
	 * 
	 * @param word a word
	 * @param pos its part of speech
	 * @return hypernyms or <code>null</code> if lookup failed
	 */
	public static String[] getHypernyms(String word, POS pos) {
		Synset synset = getCommonSynset(word, pos);
		if (synset == null) return null;
		
		Synset[] hypernyms = getHypernymSynsets(synset);
		if (hypernyms == null) return null;
		
		return getLemmas(hypernyms);
	}
	
	// get 'hypernym' synsets
	private static Synset[] getHypernymSynsets(Synset synset) {
		PointerTargetNodeList hypernyms = null;
		try {
			hypernyms = PointerUtils.getInstance().getDirectHypernyms(synset);
		} catch (JWNLException e) {}
		if (hypernyms == null) return null;
		
		return getSynsets(hypernyms);
	}
	
	/**
	 * Looks up hyponyms of the given word, assuming that it is used in its most
	 * common sense.
	 * 
	 * @param word a word
	 * @param pos its part of speech
	 * @return hyponyms or <code>null</code> if lookup failed
	 */
	public static String[] getHyponyms(String word, POS pos) {
		Synset synset = getCommonSynset(word, pos);
		if (synset == null) return null;
		
		Synset[] hyponyms = getHyponymSynsets(synset);
		if (hyponyms == null) return null;
		
		return getLemmas(hyponyms);
	}
	
	/**
	 * Looks up hyponyms of the synset with the given POS and offset.
	 * 
	 * @param pos POS of the synset
	 * @param offset offset of the synset
	 * @return hyponyms or <code>null</code> if lookup failed
	 */
	public static String[] getHyponyms(POS pos, long offset) {
		Synset synset = null;
		try {
			synset = dict.getSynsetAt(pos, offset);
		} catch (JWNLException e) {}
		if (synset == null) return null;
		
		Synset[] hyponyms = getHyponymSynsets(synset);
		if (hyponyms == null) return null;
		
		return getLemmas(hyponyms);
	}
	
	/**
	 * Looks up hyponyms of the synset with POS "noun" and the given offset.
	 * 
	 * @param offset offset of the synset
	 * @return hyponyms or <code>null</code> if lookup failed
	 */
	public static String[] getNounHyponyms(long offset) {
		return getHyponyms(POS.NOUN, offset);
	}
	
	// get 'hyponym' synsets
	private static Synset[] getHyponymSynsets(Synset synset) {
		PointerTargetNodeList hyponyms = null;
		try {
			hyponyms = PointerUtils.getInstance().getDirectHyponyms(synset);
		} catch (JWNLException e) {}
		if (hyponyms == null) return null;
		
		return getSynsets(hyponyms);
	}
	
	// relations for verbs
	
	/**
	 * Looks up verbs that entail the given verb, assuming that it is used in
	 * its most common sense.
	 * 
	 * @param verb a verb
	 * @return entailing verbs or <code>null</code> if lookup failed
	 */
	public static String[] getEntailing(String verb) {
		Synset synset = getCommonSynset(verb, VERB);
		if (synset == null) return null;
		
		Synset[] entailing = getEntailingSynsets(synset);
		if (entailing == null) return null;
		
		return getLemmas(entailing);
	}
	
	// get 'entailing' synsets
	private static Synset[] getEntailingSynsets(Synset synset) {
		PointerTargetNodeList entailing = null;
		try {
			entailing = PointerUtils.getInstance().getEntailments(synset);
		} catch (JWNLException e) {}
		if (entailing == null) return null;
		
		return getSynsets(entailing);
	}
	
	/**
	 * Looks up verbs that cause the given verb, assuming that it is used in its
	 * most common sense.
	 * 
	 * @param verb a verb
	 * @return causing verbs or <code>null</code> if lookup failed
	 */
	public static String[] getCausing(String verb) {
		Synset synset = getCommonSynset(verb, VERB);
		if (synset == null) return null;
		
		Synset[] causing = getCausingSynsets(synset);
		if (causing == null) return null;
		
		return getLemmas(causing);
	}
	
	// get 'causing' synsets
	private static Synset[] getCausingSynsets(Synset synset) {
		PointerTargetNodeList causing = null;
		try {
			causing = PointerUtils.getInstance().getCauses(synset);
		} catch (JWNLException e) {}
		if (causing == null) return null;
		
		return getSynsets(causing);
	}
	
	// relations for nouns
	
	/**
	 * Looks up member holonyms of the given noun, assuming that it is used in
	 * its most common sense.
	 * 
	 * @param noun a noun
	 * @return member holonyms or <code>null</code> if lookup failed
	 */
	public static String[] getMembersOf(String noun) {
		Synset synset = getCommonSynset(noun, NOUN);
		if (synset == null) return null;
		
		Synset[] membersOf = getMemberOfSynsets(synset);
		if (membersOf == null) return null;
		
		return getLemmas(membersOf);
	}
	
	// get 'member-of' synsets
	private static Synset[] getMemberOfSynsets(Synset synset) {
		PointerTargetNodeList membersOf = null;
		try {
			membersOf = PointerUtils.getInstance().getMemberHolonyms(synset);
		} catch (JWNLException e) {}
		if (membersOf == null) return null;
		
		return getSynsets(membersOf);
	}
	
	/**
	 * Looks up substance holonyms of the given noun, assuming that it is used in
	 * its most common sense.
	 * 
	 * @param noun a noun
	 * @return substance holonyms or <code>null</code> if lookup failed
	 */
	public static String[] getSubstancesOf(String noun) {
		Synset synset = getCommonSynset(noun, NOUN);
		if (synset == null) return null;
		
		Synset[] substancesOf = getSubstanceOfSynsets(synset);
		if (substancesOf == null) return null;
		
		return getLemmas(substancesOf);
	}
	
	// get 'substance-of' synsets
	private static Synset[] getSubstanceOfSynsets(Synset synset) {
		PointerTargetNodeList substancesOf = null;
		try {
			substancesOf = PointerUtils.getInstance().getSubstanceHolonyms(synset);
		} catch (JWNLException e) {}
		if (substancesOf == null) return null;
		
		return getSynsets(substancesOf);
	}
	
	/**
	 * Looks up part holonyms of the given noun, assuming that it is used in its
	 * most common sense.
	 * 
	 * @param noun a noun
	 * @return part holonyms or <code>null</code> if lookup failed
	 */
	public static String[] getPartsOf(String noun) {
		Synset synset = getCommonSynset(noun, NOUN);
		if (synset == null) return null;
		
		Synset[] partsOf = getPartOfSynsets(synset);
		if (partsOf == null) return null;
		
		return getLemmas(partsOf);
	}
	
	// get 'part-of' synsets
	private static Synset[] getPartOfSynsets(Synset synset) {
		PointerTargetNodeList partsOf = null;
		try {
			partsOf = PointerUtils.getInstance().getPartHolonyms(synset);
		} catch (JWNLException e) {}
		if (partsOf == null) return null;
		
		return getSynsets(partsOf);
	}
	
	/**
	 * Looks up member meronyms of the given noun, assuming that it is used in
	 * its most common sense.
	 * 
	 * @param noun a noun
	 * @return member meronyms or <code>null</code> if lookup failed
	 */
	public static String[] getHaveMember(String noun) {
		Synset synset = getCommonSynset(noun, NOUN);
		if (synset == null) return null;
		
		Synset[] haveMember = getHasMemberSynsets(synset);
		if (haveMember == null) return null;
		
		return getLemmas(haveMember);
	}
	
	// get 'has-member' synsets
	private static Synset[] getHasMemberSynsets(Synset synset) {
		PointerTargetNodeList haveMember = null;
		try {
			haveMember = PointerUtils.getInstance().getMemberMeronyms(synset);
		} catch (JWNLException e) {}
		if (haveMember == null) return null;
		
		return getSynsets(haveMember);
	}
	
	/**
	 * Looks up substance meronyms of the given noun, assuming that it is used in
	 * its most common sense.
	 * 
	 * @param noun a noun
	 * @return substance meronyms or <code>null</code> if lookup failed
	 */
	public static String[] getHaveSubstance(String noun) {
		Synset synset = getCommonSynset(noun, NOUN);
		if (synset == null) return null;
		
		Synset[] haveSubstance = getHasSubstanceSynsets(synset);
		if (haveSubstance == null) return null;
		
		return getLemmas(haveSubstance);
	}
	
	// get 'has-substance' synsets
	private static Synset[] getHasSubstanceSynsets(Synset synset) {
		PointerTargetNodeList haveSubstance = null;
		try {
			haveSubstance = PointerUtils.getInstance().getSubstanceMeronyms(synset);
		} catch (JWNLException e) {}
		if (haveSubstance == null) return null;
		
		return getSynsets(haveSubstance);
	}
	
	/**
	 * Looks up part meronyms of the given noun, assuming that it is used in its
	 * most common sense.
	 * 
	 * @param noun a noun
	 * @return part meronyms or <code>null</code> if lookup failed
	 */
	public static String[] getHavePart(String noun) {
		Synset synset = getCommonSynset(noun, NOUN);
		if (synset == null) return null;
		
		Synset[] havePart = getHasPartSynsets(synset);
		if (havePart == null) return null;
		
		return getLemmas(havePart);
	}
	
	// get 'has-part' synsets
	private static Synset[] getHasPartSynsets(Synset synset) {
		PointerTargetNodeList havePart = null;
		try {
			havePart = PointerUtils.getInstance().getPartMeronyms(synset);
		} catch (JWNLException e) {}
		if (havePart == null) return null;
		
		return getSynsets(havePart);
	}
	
	// implement the interface 'Ontology'
	
	/**
	 * Looks up a word.
	 * 
	 * @param word the word to look up
	 * @return <code>true</code> iff the word was found
	 */
	public boolean contains(String word) {
//		// look for compound nouns and verbs
//		return isCompoundWord(word);
		// only look for compound nouns
		return isCompoundNoun(word);
	}
	
	/**
	 * Expands an event by looking up related events.
	 * 
	 * @param event an event
	 * @return related events and their weights
	 */
	public Map<String, Double> expandEvent(String event) {
		if (!isVerb(event)) return new Hashtable<String, Double>();
		
		// synsets of related concepts
		Map<Synset, Double> synsets = new Hashtable<Synset, Double>();
		// synsets that have already been expanded
		Map<Synset, Double> expanded = new Hashtable<Synset, Double>();
		
		// get most common synset
		double hurdle = TermExpander.MIN_EXPANSION_WEIGHT;
		if (SYNONYM_WEIGHT >= hurdle) {
			Synset synset = getCommonSynset(event, VERB);
			if (synset != null) synsets.put(synset, 1d);
		}
		
		// expand synsets
		int pathLength = 0;
		while (pathLength++ < MAX_PATH_LENGTH && synsets.size() > 0) {
			// get synsets and their weights
			Synset[] currSynsets =
				synsets.keySet().toArray(new Synset[synsets.size()]);
			double[] currWeights = new double[synsets.size()];
			for (int i = 0; i < synsets.size(); i++)
				currWeights[i] = synsets.get(currSynsets[i]);
			
			for (int i = 0; i < currSynsets.length; i++) {
				Synset synset = currSynsets[i];
				double weight = currWeights[i];
				
				// move to expanded synsets
				if (synsets.get(synset) == weight)
					synsets.remove(synset);
				if (!expanded.containsKey(synset) ||
						expanded.get(synset) < weight) {
					expanded.put(synset, weight);
				} else continue;
				
				// 'hypernym' relation
				double hypernymWeight = weight * HYPERNYM_WEIGHT;
				if (hypernymWeight >= hurdle) {
					Synset[] hypernyms = getHypernymSynsets(synset);
					for (Synset hypernym : hypernyms)
						if (!synsets.containsKey(hypernym) ||
								synsets.get(hypernym) < hypernymWeight)
						synsets.put(hypernym, hypernymWeight);
				}
				// 'hyponym' relation
				double hyponymWeight = weight * HYPONYM_WEIGHT;
				if (hyponymWeight >= hurdle) {
					Synset[] hyponyms = getHyponymSynsets(synset);
					for (Synset hyponym : hyponyms)
						if (!synsets.containsKey(hyponym) ||
								synsets.get(hyponym) < hyponymWeight)
						synsets.put(hyponym, hyponymWeight);
				}
				// 'entailing' relation
				double entailingWeight = weight * ENTAILING_WEIGHT;
				if (entailingWeight >= hurdle) {
					Synset[] entailing = getEntailingSynsets(synset);
					for (Synset entails : entailing)
						if (!synsets.containsKey(entails) ||
								synsets.get(entails) < entailingWeight)
						synsets.put(entails, entailingWeight);
				}
				// 'causing' relation
				double causingWeight = weight * CAUSING_WEIGHT;
				if (causingWeight >= hurdle) {
					Synset[] causing = getCausingSynsets(synset);
					for (Synset causes : causing)
						if (!synsets.containsKey(causes) ||
								synsets.get(causes) < causingWeight)
						synsets.put(causes, causingWeight);
				}
			}
		}
		
		for (Synset synset : synsets.keySet()) {
			double weight = synsets.get(synset);
			if (!expanded.containsKey(synset) ||
					expanded.get(synset) < weight)
				expanded.put(synset, weight);
		}
		
		// get concepts in synsets
		Map<String, Double> expansions = new Hashtable<String, Double>();
		for (Synset synset : expanded.keySet()) {
			double weight = expanded.get(synset);
			if (weight == 1) weight = SYNONYM_WEIGHT;  // direct synonyms
			for (String expansion : getLemmas(synset))
				if (!expansions.containsKey(expansion) ||
						expansions.get(expansion) < weight)
					expansions.put(expansion, weight);
		}
		List<String> dropped = new ArrayList<String>();
		for (String expansion : expansions.keySet())
			if (expansion.equalsIgnoreCase(event)) dropped.add(expansion);
		for (String expansion : dropped) expansions.remove(expansion);
		
		return expansions;
	}
	
	/**
	 * Expands an entity by looking up related entities.
	 * 
	 * @param entity an entity
	 * @return related entities and their weights
	 */
	public Map<String, Double> expandEntity(String entity) {
		if (!isCompoundNoun(entity)) return new Hashtable<String, Double>();
		
		// synsets of related concepts
		Map<Synset, Double> synsets = new Hashtable<Synset, Double>();
		// synsets that have already been expanded
		Map<Synset, Double> expanded = new Hashtable<Synset, Double>();
		
		// get most common synset
		double hurdle = TermExpander.MIN_EXPANSION_WEIGHT;
		if (SYNONYM_WEIGHT >= hurdle) {
			Synset synset = getCommonSynset(entity, NOUN);
			if (synset != null) synsets.put(synset, 1d);
		}
		
		// expand synsets
		int pathLength = 0;
		while (pathLength++ < MAX_PATH_LENGTH && synsets.size() > 0) {
			// get synsets and their weights
			Synset[] currSynsets =
				synsets.keySet().toArray(new Synset[synsets.size()]);
			double[] currWeights = new double[synsets.size()];
			for (int i = 0; i < synsets.size(); i++)
				currWeights[i] = synsets.get(currSynsets[i]);
			
			for (int i = 0; i < currSynsets.length; i++) {
				Synset synset = currSynsets[i];
				double weight = currWeights[i];
				
				// move to expanded synsets
				if (synsets.get(synset) == weight)
					synsets.remove(synset);
				if (!expanded.containsKey(synset) ||
						expanded.get(synset) < weight) {
					expanded.put(synset, weight);
				} else continue;
				
				// 'hypernym' relation
				double hypernymWeight = weight * HYPERNYM_WEIGHT;
				if (hypernymWeight >= hurdle) {
					Synset[] hypernyms = getHypernymSynsets(synset);
					for (Synset hypernym : hypernyms)
						if (!synsets.containsKey(hypernym) ||
								synsets.get(hypernym) < hypernymWeight)
						synsets.put(hypernym, hypernymWeight);
				}
				// 'hyponym' relation
				double hyponymWeight = weight * HYPONYM_WEIGHT;
				if (hyponymWeight >= hurdle) {
					Synset[] hyponyms = getHyponymSynsets(synset);
					for (Synset hyponym : hyponyms)
						if (!synsets.containsKey(hyponym) ||
								synsets.get(hyponym) < hyponymWeight)
						synsets.put(hyponym, hyponymWeight);
				}
				// 'member-of' relation
				double memberOfWeight = weight * MEMBER_OF_WEIGHT;
				if (memberOfWeight >= hurdle) {
					Synset[] membersOf = getMemberOfSynsets(synset);
					for (Synset memberOf : membersOf)
						if (!synsets.containsKey(memberOf) ||
								synsets.get(memberOf) < memberOfWeight)
						synsets.put(memberOf, memberOfWeight);
				}
				// 'substance-of' relation
				double substanceOfWeight = weight * SUBSTANCE_OF_WEIGHT;
				if (substanceOfWeight >= hurdle) {
					Synset[] substancesOf = getSubstanceOfSynsets(synset);
					for (Synset substanceOf : substancesOf)
						if (!synsets.containsKey(substanceOf) ||
								synsets.get(substanceOf) < substanceOfWeight)
						synsets.put(substanceOf, substanceOfWeight);
				}
				// 'part-of' relation
				double partOfWeight = weight * PART_OF_WEIGHT;
				if (partOfWeight >= hurdle) {
					Synset[] partsOf = getPartOfSynsets(synset);
					for (Synset partOf : partsOf)
						if (!synsets.containsKey(partOf) ||
								synsets.get(partOf) < partOfWeight)
						synsets.put(partOf, partOfWeight);
				}
				// 'has-member' relation
				double hasMemberWeight = weight * HAS_MEMBER_WEIGHT;
				if (hasMemberWeight >= hurdle) {
					Synset[] haveMember = getHasMemberSynsets(synset);
					for (Synset hasMember : haveMember)
						if (!synsets.containsKey(hasMember) ||
								synsets.get(hasMember) < hasMemberWeight)
						synsets.put(hasMember, hasMemberWeight);
				}
				// 'has-substance' relation
				double hasSubstanceWeight = weight * HAS_SUBSTANCE_WEIGHT;
				if (hasSubstanceWeight >= hurdle) {
					Synset[] haveSubstance = getHasSubstanceSynsets(synset);
					for (Synset hasSubstance : haveSubstance)
						if (!synsets.containsKey(hasSubstance) ||
								synsets.get(hasSubstance) < hasSubstanceWeight)
						synsets.put(hasSubstance, hasSubstanceWeight);
				}
				// 'has-part' relation
				double hasPartWeight = weight * HAS_PART_WEIGHT;
				if (hasPartWeight >= hurdle) {
					Synset[] havePart = getHasPartSynsets(synset);
					for (Synset hasPart : havePart)
						if (!synsets.containsKey(hasPart) ||
								synsets.get(hasPart) < hasPartWeight)
						synsets.put(hasPart, hasPartWeight);
				}
			}
		}
		
		for (Synset synset : synsets.keySet()) {
			double weight = synsets.get(synset);
			if (!expanded.containsKey(synset) ||
					expanded.get(synset) < weight)
				expanded.put(synset, weight);
		}
		
		// get concepts in synsets
		Map<String, Double> expansions = new Hashtable<String, Double>();
		for (Synset synset : expanded.keySet()) {
			double weight = expanded.get(synset);
			if (weight == 1) weight = SYNONYM_WEIGHT;  // direct synonyms
			for (String expansion : getLemmas(synset))
				if (!expansions.containsKey(expansion) ||
						expansions.get(expansion) < weight)
					expansions.put(expansion, weight);
		}
		List<String> dropped = new ArrayList<String>();
		for (String expansion : expansions.keySet())
			if (expansion.equalsIgnoreCase(entity)) dropped.add(expansion);
		for (String expansion : dropped) expansions.remove(expansion);
		
		return expansions;
	}
	
	/**
	 * Expands a modifier by looking up related modifiers.
	 * 
	 * @param modifier a modifier
	 * @param pos its part of speech: <code>POS.ADJECTIVE</code> or
	 *            <code>POS.ADVERB</code>
	 * @return related modifiers and their weights
	 */
	public Map<String, Double> expandModifier(String modifier, POS pos) {
		if ((pos.equals(ADJECTIVE) && !isAdjective(modifier)) ||
				(pos.equals(ADVERB) && !isAdverb(modifier)))
			return new Hashtable<String, Double>();
		
		// synsets of related concepts
		Map<Synset, Double> synsets = new Hashtable<Synset, Double>();
		// synsets that have already been expanded
		Map<Synset, Double> expanded = new Hashtable<Synset, Double>();
		
		// get most common synset
		double hurdle = TermExpander.MIN_EXPANSION_WEIGHT;
		if (SYNONYM_WEIGHT >= hurdle) {
			Synset synset = getCommonSynset(modifier, pos);
			if (synset != null) synsets.put(synset, 1d);
		}
		
		// expand synsets
		int pathLength = 0;
		while (pathLength++ < MAX_PATH_LENGTH && synsets.size() > 0) {
			// get synsets and their weights
			Synset[] currSynsets =
				synsets.keySet().toArray(new Synset[synsets.size()]);
			double[] currWeights = new double[synsets.size()];
			for (int i = 0; i < synsets.size(); i++)
				currWeights[i] = synsets.get(currSynsets[i]);
			
			for (int i = 0; i < currSynsets.length; i++) {
				Synset synset = currSynsets[i];
				double weight = currWeights[i];
				
				// move to expanded synsets
				if (synsets.get(synset) == weight)
					synsets.remove(synset);
				if (!expanded.containsKey(synset) ||
						expanded.get(synset) < weight) {
					expanded.put(synset, weight);
				} else continue;
				
				// currently no relations other than synonyms
			}
		}
		
		for (Synset synset : synsets.keySet()) {
			double weight = synsets.get(synset);
			if (!expanded.containsKey(synset) ||
					expanded.get(synset) < weight)
				expanded.put(synset, weight);
		}
		
		// get concepts in synsets
		Map<String, Double> expansions = new Hashtable<String, Double>();
		for (Synset synset : expanded.keySet()) {
			double weight = expanded.get(synset);
			if (weight == 1) weight = SYNONYM_WEIGHT;  // direct synonyms
			for (String expansion : getLemmas(synset))
				if (!expansions.containsKey(expansion) ||
						expansions.get(expansion) < weight)
					expansions.put(expansion, weight);
		}
		List<String> dropped = new ArrayList<String>();
		for (String expansion : expansions.keySet())
			if (expansion.equalsIgnoreCase(modifier)) dropped.add(expansion);
		for (String expansion : dropped) expansions.remove(expansion);
		
		return expansions;
	}
}