package info.ephyra.nlp.semantics.ontologies; import info.ephyra.questionanalysis.TermExpander; import java.io.File; import java.io.FileInputStream; import java.util.ArrayList; import java.util.HashSet; import java.util.Hashtable; import java.util.List; import java.util.Map; import net.didion.jwnl.JWNL; import net.didion.jwnl.JWNLException; import net.didion.jwnl.data.IndexWord; import net.didion.jwnl.data.IndexWordSet; import net.didion.jwnl.data.POS; import net.didion.jwnl.data.PointerUtils; import net.didion.jwnl.data.Synset; import net.didion.jwnl.data.Word; import net.didion.jwnl.data.list.PointerTargetNode; import net.didion.jwnl.data.list.PointerTargetNodeList; /** * <p>An interface to <a href="http://wordnet.princeton.edu/">WordNet</a>, a * lexical database for the English language.</p> * * <p>This class implements the interface <code>Ontology</code>.</p> * * @author Nico Schlaefer * @version 2007-05-30 */ public class WordNet implements Ontology { /** Indicates that a word is an adjective. */ public static final POS ADJECTIVE = POS.ADJECTIVE; /** Indicates that a word is an adverb. */ public static final POS ADVERB = POS.ADVERB; /** Indicates that a word is a noun. */ public static final POS NOUN = POS.NOUN; /** Indicates that a word is a verb. */ public static final POS VERB = POS.VERB; /** Maximum length of a path to an expansion. */ public static final int MAX_PATH_LENGTH = 1; // relations for multiple parts of speech /** Weight for the relation 'synonym'. */ private static final double SYNONYM_WEIGHT = 0.9; /** Weight for the relation 'hypernym'. */ private static final double HYPERNYM_WEIGHT = 0.8; /** Weight for the relation 'hyponym'. */ private static final double HYPONYM_WEIGHT = 0.7; // /** Weight for the relation 'see-also'. */ // private static final double SEE_ALSO_WEIGHT = 0.5; // /** Weight for the relation 'gloss'. */ // private static final double GLOSS_WEIGHT = 0.6; // /** Weight for the relation 'rgloss'. */ // private static final double RGLOSS_WEIGHT = 0.2; // relations for verbs /** Weight for the relation 'entailing'. */ private static final double ENTAILING_WEIGHT = 0.7; /** Weight for the relation 'causing'. */ private static final double CAUSING_WEIGHT = 0.5; // relations for nouns /** Weight for the relation 'member-of'. */ private static final double MEMBER_OF_WEIGHT = 0.5; /** Weight for the relation 'substance-of'. */ private static final double SUBSTANCE_OF_WEIGHT = 0.5; /** Weight for the relation 'part-of'. */ private static final double PART_OF_WEIGHT = 0.5; /** Weight for the relation 'has-member'. */ private static final double HAS_MEMBER_WEIGHT = 0.5; /** Weight for the relation 'has-substance'. */ private static final double HAS_SUBSTANCE_WEIGHT = 0.5; /** Weight for the relation 'has-part'. */ private static final double HAS_PART_WEIGHT = 0.5; // relations for adjectives and adverbs // /** Weight for the relation 'pertainym'. */ // private static final double PERTAINYM_WEIGHT = 0.5; /** WordNet dictionary. */ private static net.didion.jwnl.dictionary.Dictionary dict; /** * Initializes the wrapper for the WordNet dictionary. * * @param properties property file */ public static boolean initialize(String properties) { try { File file = new File(properties); JWNL.initialize(new FileInputStream(file)); dict = net.didion.jwnl.dictionary.Dictionary.getInstance(); } catch (Exception e) { return false; } return true; } /** * Checks if the word exists in WordNet. * * @param word a word * @return <code>true</code> iff the word is in WordNet */ public static boolean isWord(String word) { if (dict == null) return false; IndexWordSet indexWordSet = null; try { indexWordSet = dict.lookupAllIndexWords(word); } catch (JWNLException e) {} return indexWordSet.size() > 0; } /** * Checks if the word exists in WordNet. Supports multi-token terms. * * @param word a word * @return <code>true</code> iff the word is in WordNet */ public static boolean isCompoundWord(String word) { if (dict == null) return false; // do not look up words with special characters other than '.' if (word.matches(".*?[^\\w\\s\\.].*+")) return false; IndexWordSet indexWordSet = null; try { indexWordSet = dict.lookupAllIndexWords(word); } catch (JWNLException e) {} // ensure that the word, and not just a substring, was found in WordNet int wordTokens = word.split("\\s", -1).length; int wordDots = word.split("\\.", -1).length; for (IndexWord indexWord : indexWordSet.getIndexWordArray()) { String lemma = indexWord.getLemma(); int lemmaTokens = lemma.split("\\s", -1).length; int lemmaDots = lemma.split("\\.", -1).length; if (wordTokens == lemmaTokens && wordDots == lemmaDots) return true; } return false; } /** * Checks if the word exists as an adjective. * * @param word a word * @return <code>true</code> iff the word is an adjective */ public static boolean isAdjective(String word) { if (dict == null) return false; IndexWord indexWord = null; try { indexWord = dict.lookupIndexWord(POS.ADJECTIVE, word); } catch (JWNLException e) {} return (indexWord != null) ? true : false; } /** * Checks if the word exists as an adverb. * * @param word a word * @return <code>true</code> iff the word is an adverb */ public static boolean isAdverb(String word) { if (dict == null) return false; IndexWord indexWord = null; try { indexWord = dict.lookupIndexWord(POS.ADVERB, word); } catch (JWNLException e) {} return (indexWord != null) ? true : false; } /** * Checks if the word exists as a noun. * * @param word a word * @return <code>true</code> iff the word is a noun */ public static boolean isNoun(String word) { if (dict == null) return false; IndexWord indexWord = null; try { indexWord = dict.lookupIndexWord(POS.NOUN, word); } catch (JWNLException e) {} return (indexWord != null) ? true : false; } /** * Checks if the word exists as a noun. Supports multi-token terms. * * @param word a word * @return <code>true</code> iff the word is a noun */ public static boolean isCompoundNoun(String word) { if (dict == null) return false; // do not look up words with special characters other than '.' if (word.matches(".*?[^\\w\\s\\.].*+")) return false; IndexWord indexWord = null; try { indexWord = dict.lookupIndexWord(POS.NOUN, word); } catch (JWNLException e) {} if (indexWord == null) return false; // ensure that the word, and not just a substring, was found in WordNet int wordTokens = word.split("\\s", -1).length; int wordDots = word.split("\\.", -1).length; String lemma = indexWord.getLemma(); int lemmaTokens = lemma.split("\\s", -1).length; int lemmaDots = lemma.split("\\.", -1).length; return wordTokens == lemmaTokens && wordDots == lemmaDots; } /** * Checks if the word exists as a verb. * * @param word a word * @return <code>true</code> iff the word is a verb */ public static boolean isVerb(String word) { if (dict == null) return false; IndexWord indexWord = null; try { indexWord = dict.lookupIndexWord(POS.VERB, word); } catch (JWNLException e) {} return (indexWord != null) ? true : false; } /** * Looks up the lemma of a word. * * @param word a word * @param pos its part of speech * @return lemma or <code>null</code> if lookup failed */ public static String getLemma(String word, POS pos) { if (dict == null) return null; IndexWord indexWord = null; try { indexWord = dict.lookupIndexWord(pos, word); } catch (JWNLException e) {} if (indexWord == null) return null; String lemma = indexWord.getLemma(); lemma = lemma.replace("_", " "); return lemma; } /** * Looks up the lemma of a compound word. * * @param word a word * @param pos its part of speech * @return lemma or <code>null</code> if lookup failed */ public static String getCompoundLemma(String word, POS pos) { // do not look up words with special characters other than '.' if (word.matches(".*?[^\\w\\s\\.].*+")) return null; String lemma = getLemma(word, pos); if (lemma == null) return null; // ensure that the word, and not just a substring, was found in WordNet int wordTokens = word.split("\\s", -1).length; int wordDots = word.split("\\.", -1).length; int lemmaTokens = lemma.split("\\s", -1).length; int lemmaDots = lemma.split("\\.", -1).length; if (wordTokens != lemmaTokens || wordDots != lemmaDots) return null; return lemma; } /** * Looks up the most common synset of a word. * * @param word a word * @param pos its part of speech * @return synset or <code>null</code> if lookup failed */ private static Synset getCommonSynset(String word, POS pos) { if (dict == null) return null; Synset synset = null; try { IndexWord indexWord = dict.lookupIndexWord(pos, word); if (indexWord == null) return null; synset = indexWord.getSense(1); } catch (JWNLException e) {} return synset; } /** * Looks up the synsets that correspond to the nodes in a node list. * * @param nodes node list * @return synsets */ private static Synset[] getSynsets(PointerTargetNodeList nodes) { Synset[] synsets = new Synset[nodes.size()]; for (int i = 0; i < nodes.size(); i++) { PointerTargetNode node = (PointerTargetNode) nodes.get(i); synsets[i] = node.getSynset(); } return synsets; } /** * Looks up the lemmas of the words in a synset. * * @param synset a synset * @return lemmas */ private static String[] getLemmas(Synset synset) { Word[] words = synset.getWords(); String[] lemmas = new String[words.length]; for (int i = 0; i < words.length; i++) { lemmas[i] = words[i].getLemma(); lemmas[i] = lemmas[i].replace("_", " "); } return lemmas; } /** * Looks up the lemmas of the words in all synsets. * * @param synsets the synsets * @return lemmas */ private static String[] getLemmas(Synset[] synsets) { HashSet<String> lemmaSet = new HashSet<String>(); for (Synset synset : synsets) { String[] lemmas = getLemmas(synset); for (String lemma : lemmas) lemmaSet.add(lemma); } return lemmaSet.toArray(new String[lemmaSet.size()]); } // relations for multiple parts of speech /** * Looks up synonyms of the given word, assuming that it is used in its most * common sense. * * @param word a word * @param pos its part of speech * @return synonyms or <code>null</code> if lookup failed */ public static String[] getSynonyms(String word, POS pos) { Synset synset = getCommonSynset(word, pos); if (synset == null) return null; return getLemmas(synset); } /** * Looks up hypernyms of the given word, assuming that it is used in its * most common sense. * * @param word a word * @param pos its part of speech * @return hypernyms or <code>null</code> if lookup failed */ public static String[] getHypernyms(String word, POS pos) { Synset synset = getCommonSynset(word, pos); if (synset == null) return null; Synset[] hypernyms = getHypernymSynsets(synset); if (hypernyms == null) return null; return getLemmas(hypernyms); } // get 'hypernym' synsets private static Synset[] getHypernymSynsets(Synset synset) { PointerTargetNodeList hypernyms = null; try { hypernyms = PointerUtils.getInstance().getDirectHypernyms(synset); } catch (JWNLException e) {} if (hypernyms == null) return null; return getSynsets(hypernyms); } /** * Looks up hyponyms of the given word, assuming that it is used in its most * common sense. * * @param word a word * @param pos its part of speech * @return hyponyms or <code>null</code> if lookup failed */ public static String[] getHyponyms(String word, POS pos) { Synset synset = getCommonSynset(word, pos); if (synset == null) return null; Synset[] hyponyms = getHyponymSynsets(synset); if (hyponyms == null) return null; return getLemmas(hyponyms); } /** * Looks up hyponyms of the synset with the given POS and offset. * * @param pos POS of the synset * @param offset offset of the synset * @return hyponyms or <code>null</code> if lookup failed */ public static String[] getHyponyms(POS pos, long offset) { Synset synset = null; try { synset = dict.getSynsetAt(pos, offset); } catch (JWNLException e) {} if (synset == null) return null; Synset[] hyponyms = getHyponymSynsets(synset); if (hyponyms == null) return null; return getLemmas(hyponyms); } /** * Looks up hyponyms of the synset with POS "noun" and the given offset. * * @param offset offset of the synset * @return hyponyms or <code>null</code> if lookup failed */ public static String[] getNounHyponyms(long offset) { return getHyponyms(POS.NOUN, offset); } // get 'hyponym' synsets private static Synset[] getHyponymSynsets(Synset synset) { PointerTargetNodeList hyponyms = null; try { hyponyms = PointerUtils.getInstance().getDirectHyponyms(synset); } catch (JWNLException e) {} if (hyponyms == null) return null; return getSynsets(hyponyms); } // relations for verbs /** * Looks up verbs that entail the given verb, assuming that it is used in * its most common sense. * * @param verb a verb * @return entailing verbs or <code>null</code> if lookup failed */ public static String[] getEntailing(String verb) { Synset synset = getCommonSynset(verb, VERB); if (synset == null) return null; Synset[] entailing = getEntailingSynsets(synset); if (entailing == null) return null; return getLemmas(entailing); } // get 'entailing' synsets private static Synset[] getEntailingSynsets(Synset synset) { PointerTargetNodeList entailing = null; try { entailing = PointerUtils.getInstance().getEntailments(synset); } catch (JWNLException e) {} if (entailing == null) return null; return getSynsets(entailing); } /** * Looks up verbs that cause the given verb, assuming that it is used in its * most common sense. * * @param verb a verb * @return causing verbs or <code>null</code> if lookup failed */ public static String[] getCausing(String verb) { Synset synset = getCommonSynset(verb, VERB); if (synset == null) return null; Synset[] causing = getCausingSynsets(synset); if (causing == null) return null; return getLemmas(causing); } // get 'causing' synsets private static Synset[] getCausingSynsets(Synset synset) { PointerTargetNodeList causing = null; try { causing = PointerUtils.getInstance().getCauses(synset); } catch (JWNLException e) {} if (causing == null) return null; return getSynsets(causing); } // relations for nouns /** * Looks up member holonyms of the given noun, assuming that it is used in * its most common sense. * * @param noun a noun * @return member holonyms or <code>null</code> if lookup failed */ public static String[] getMembersOf(String noun) { Synset synset = getCommonSynset(noun, NOUN); if (synset == null) return null; Synset[] membersOf = getMemberOfSynsets(synset); if (membersOf == null) return null; return getLemmas(membersOf); } // get 'member-of' synsets private static Synset[] getMemberOfSynsets(Synset synset) { PointerTargetNodeList membersOf = null; try { membersOf = PointerUtils.getInstance().getMemberHolonyms(synset); } catch (JWNLException e) {} if (membersOf == null) return null; return getSynsets(membersOf); } /** * Looks up substance holonyms of the given noun, assuming that it is used in * its most common sense. * * @param noun a noun * @return substance holonyms or <code>null</code> if lookup failed */ public static String[] getSubstancesOf(String noun) { Synset synset = getCommonSynset(noun, NOUN); if (synset == null) return null; Synset[] substancesOf = getSubstanceOfSynsets(synset); if (substancesOf == null) return null; return getLemmas(substancesOf); } // get 'substance-of' synsets private static Synset[] getSubstanceOfSynsets(Synset synset) { PointerTargetNodeList substancesOf = null; try { substancesOf = PointerUtils.getInstance().getSubstanceHolonyms(synset); } catch (JWNLException e) {} if (substancesOf == null) return null; return getSynsets(substancesOf); } /** * Looks up part holonyms of the given noun, assuming that it is used in its * most common sense. * * @param noun a noun * @return part holonyms or <code>null</code> if lookup failed */ public static String[] getPartsOf(String noun) { Synset synset = getCommonSynset(noun, NOUN); if (synset == null) return null; Synset[] partsOf = getPartOfSynsets(synset); if (partsOf == null) return null; return getLemmas(partsOf); } // get 'part-of' synsets private static Synset[] getPartOfSynsets(Synset synset) { PointerTargetNodeList partsOf = null; try { partsOf = PointerUtils.getInstance().getPartHolonyms(synset); } catch (JWNLException e) {} if (partsOf == null) return null; return getSynsets(partsOf); } /** * Looks up member meronyms of the given noun, assuming that it is used in * its most common sense. * * @param noun a noun * @return member meronyms or <code>null</code> if lookup failed */ public static String[] getHaveMember(String noun) { Synset synset = getCommonSynset(noun, NOUN); if (synset == null) return null; Synset[] haveMember = getHasMemberSynsets(synset); if (haveMember == null) return null; return getLemmas(haveMember); } // get 'has-member' synsets private static Synset[] getHasMemberSynsets(Synset synset) { PointerTargetNodeList haveMember = null; try { haveMember = PointerUtils.getInstance().getMemberMeronyms(synset); } catch (JWNLException e) {} if (haveMember == null) return null; return getSynsets(haveMember); } /** * Looks up substance meronyms of the given noun, assuming that it is used in * its most common sense. * * @param noun a noun * @return substance meronyms or <code>null</code> if lookup failed */ public static String[] getHaveSubstance(String noun) { Synset synset = getCommonSynset(noun, NOUN); if (synset == null) return null; Synset[] haveSubstance = getHasSubstanceSynsets(synset); if (haveSubstance == null) return null; return getLemmas(haveSubstance); } // get 'has-substance' synsets private static Synset[] getHasSubstanceSynsets(Synset synset) { PointerTargetNodeList haveSubstance = null; try { haveSubstance = PointerUtils.getInstance().getSubstanceMeronyms(synset); } catch (JWNLException e) {} if (haveSubstance == null) return null; return getSynsets(haveSubstance); } /** * Looks up part meronyms of the given noun, assuming that it is used in its * most common sense. * * @param noun a noun * @return part meronyms or <code>null</code> if lookup failed */ public static String[] getHavePart(String noun) { Synset synset = getCommonSynset(noun, NOUN); if (synset == null) return null; Synset[] havePart = getHasPartSynsets(synset); if (havePart == null) return null; return getLemmas(havePart); } // get 'has-part' synsets private static Synset[] getHasPartSynsets(Synset synset) { PointerTargetNodeList havePart = null; try { havePart = PointerUtils.getInstance().getPartMeronyms(synset); } catch (JWNLException e) {} if (havePart == null) return null; return getSynsets(havePart); } // implement the interface 'Ontology' /** * Looks up a word. * * @param word the word to look up * @return <code>true</code> iff the word was found */ public boolean contains(String word) { // // look for compound nouns and verbs // return isCompoundWord(word); // only look for compound nouns return isCompoundNoun(word); } /** * Expands an event by looking up related events. * * @param event an event * @return related events and their weights */ public Map<String, Double> expandEvent(String event) { if (!isVerb(event)) return new Hashtable<String, Double>(); // synsets of related concepts Map<Synset, Double> synsets = new Hashtable<Synset, Double>(); // synsets that have already been expanded Map<Synset, Double> expanded = new Hashtable<Synset, Double>(); // get most common synset double hurdle = TermExpander.MIN_EXPANSION_WEIGHT; if (SYNONYM_WEIGHT >= hurdle) { Synset synset = getCommonSynset(event, VERB); if (synset != null) synsets.put(synset, 1d); } // expand synsets int pathLength = 0; while (pathLength++ < MAX_PATH_LENGTH && synsets.size() > 0) { // get synsets and their weights Synset[] currSynsets = synsets.keySet().toArray(new Synset[synsets.size()]); double[] currWeights = new double[synsets.size()]; for (int i = 0; i < synsets.size(); i++) currWeights[i] = synsets.get(currSynsets[i]); for (int i = 0; i < currSynsets.length; i++) { Synset synset = currSynsets[i]; double weight = currWeights[i]; // move to expanded synsets if (synsets.get(synset) == weight) synsets.remove(synset); if (!expanded.containsKey(synset) || expanded.get(synset) < weight) { expanded.put(synset, weight); } else continue; // 'hypernym' relation double hypernymWeight = weight * HYPERNYM_WEIGHT; if (hypernymWeight >= hurdle) { Synset[] hypernyms = getHypernymSynsets(synset); for (Synset hypernym : hypernyms) if (!synsets.containsKey(hypernym) || synsets.get(hypernym) < hypernymWeight) synsets.put(hypernym, hypernymWeight); } // 'hyponym' relation double hyponymWeight = weight * HYPONYM_WEIGHT; if (hyponymWeight >= hurdle) { Synset[] hyponyms = getHyponymSynsets(synset); for (Synset hyponym : hyponyms) if (!synsets.containsKey(hyponym) || synsets.get(hyponym) < hyponymWeight) synsets.put(hyponym, hyponymWeight); } // 'entailing' relation double entailingWeight = weight * ENTAILING_WEIGHT; if (entailingWeight >= hurdle) { Synset[] entailing = getEntailingSynsets(synset); for (Synset entails : entailing) if (!synsets.containsKey(entails) || synsets.get(entails) < entailingWeight) synsets.put(entails, entailingWeight); } // 'causing' relation double causingWeight = weight * CAUSING_WEIGHT; if (causingWeight >= hurdle) { Synset[] causing = getCausingSynsets(synset); for (Synset causes : causing) if (!synsets.containsKey(causes) || synsets.get(causes) < causingWeight) synsets.put(causes, causingWeight); } } } for (Synset synset : synsets.keySet()) { double weight = synsets.get(synset); if (!expanded.containsKey(synset) || expanded.get(synset) < weight) expanded.put(synset, weight); } // get concepts in synsets Map<String, Double> expansions = new Hashtable<String, Double>(); for (Synset synset : expanded.keySet()) { double weight = expanded.get(synset); if (weight == 1) weight = SYNONYM_WEIGHT; // direct synonyms for (String expansion : getLemmas(synset)) if (!expansions.containsKey(expansion) || expansions.get(expansion) < weight) expansions.put(expansion, weight); } List<String> dropped = new ArrayList<String>(); for (String expansion : expansions.keySet()) if (expansion.equalsIgnoreCase(event)) dropped.add(expansion); for (String expansion : dropped) expansions.remove(expansion); return expansions; } /** * Expands an entity by looking up related entities. * * @param entity an entity * @return related entities and their weights */ public Map<String, Double> expandEntity(String entity) { if (!isCompoundNoun(entity)) return new Hashtable<String, Double>(); // synsets of related concepts Map<Synset, Double> synsets = new Hashtable<Synset, Double>(); // synsets that have already been expanded Map<Synset, Double> expanded = new Hashtable<Synset, Double>(); // get most common synset double hurdle = TermExpander.MIN_EXPANSION_WEIGHT; if (SYNONYM_WEIGHT >= hurdle) { Synset synset = getCommonSynset(entity, NOUN); if (synset != null) synsets.put(synset, 1d); } // expand synsets int pathLength = 0; while (pathLength++ < MAX_PATH_LENGTH && synsets.size() > 0) { // get synsets and their weights Synset[] currSynsets = synsets.keySet().toArray(new Synset[synsets.size()]); double[] currWeights = new double[synsets.size()]; for (int i = 0; i < synsets.size(); i++) currWeights[i] = synsets.get(currSynsets[i]); for (int i = 0; i < currSynsets.length; i++) { Synset synset = currSynsets[i]; double weight = currWeights[i]; // move to expanded synsets if (synsets.get(synset) == weight) synsets.remove(synset); if (!expanded.containsKey(synset) || expanded.get(synset) < weight) { expanded.put(synset, weight); } else continue; // 'hypernym' relation double hypernymWeight = weight * HYPERNYM_WEIGHT; if (hypernymWeight >= hurdle) { Synset[] hypernyms = getHypernymSynsets(synset); for (Synset hypernym : hypernyms) if (!synsets.containsKey(hypernym) || synsets.get(hypernym) < hypernymWeight) synsets.put(hypernym, hypernymWeight); } // 'hyponym' relation double hyponymWeight = weight * HYPONYM_WEIGHT; if (hyponymWeight >= hurdle) { Synset[] hyponyms = getHyponymSynsets(synset); for (Synset hyponym : hyponyms) if (!synsets.containsKey(hyponym) || synsets.get(hyponym) < hyponymWeight) synsets.put(hyponym, hyponymWeight); } // 'member-of' relation double memberOfWeight = weight * MEMBER_OF_WEIGHT; if (memberOfWeight >= hurdle) { Synset[] membersOf = getMemberOfSynsets(synset); for (Synset memberOf : membersOf) if (!synsets.containsKey(memberOf) || synsets.get(memberOf) < memberOfWeight) synsets.put(memberOf, memberOfWeight); } // 'substance-of' relation double substanceOfWeight = weight * SUBSTANCE_OF_WEIGHT; if (substanceOfWeight >= hurdle) { Synset[] substancesOf = getSubstanceOfSynsets(synset); for (Synset substanceOf : substancesOf) if (!synsets.containsKey(substanceOf) || synsets.get(substanceOf) < substanceOfWeight) synsets.put(substanceOf, substanceOfWeight); } // 'part-of' relation double partOfWeight = weight * PART_OF_WEIGHT; if (partOfWeight >= hurdle) { Synset[] partsOf = getPartOfSynsets(synset); for (Synset partOf : partsOf) if (!synsets.containsKey(partOf) || synsets.get(partOf) < partOfWeight) synsets.put(partOf, partOfWeight); } // 'has-member' relation double hasMemberWeight = weight * HAS_MEMBER_WEIGHT; if (hasMemberWeight >= hurdle) { Synset[] haveMember = getHasMemberSynsets(synset); for (Synset hasMember : haveMember) if (!synsets.containsKey(hasMember) || synsets.get(hasMember) < hasMemberWeight) synsets.put(hasMember, hasMemberWeight); } // 'has-substance' relation double hasSubstanceWeight = weight * HAS_SUBSTANCE_WEIGHT; if (hasSubstanceWeight >= hurdle) { Synset[] haveSubstance = getHasSubstanceSynsets(synset); for (Synset hasSubstance : haveSubstance) if (!synsets.containsKey(hasSubstance) || synsets.get(hasSubstance) < hasSubstanceWeight) synsets.put(hasSubstance, hasSubstanceWeight); } // 'has-part' relation double hasPartWeight = weight * HAS_PART_WEIGHT; if (hasPartWeight >= hurdle) { Synset[] havePart = getHasPartSynsets(synset); for (Synset hasPart : havePart) if (!synsets.containsKey(hasPart) || synsets.get(hasPart) < hasPartWeight) synsets.put(hasPart, hasPartWeight); } } } for (Synset synset : synsets.keySet()) { double weight = synsets.get(synset); if (!expanded.containsKey(synset) || expanded.get(synset) < weight) expanded.put(synset, weight); } // get concepts in synsets Map<String, Double> expansions = new Hashtable<String, Double>(); for (Synset synset : expanded.keySet()) { double weight = expanded.get(synset); if (weight == 1) weight = SYNONYM_WEIGHT; // direct synonyms for (String expansion : getLemmas(synset)) if (!expansions.containsKey(expansion) || expansions.get(expansion) < weight) expansions.put(expansion, weight); } List<String> dropped = new ArrayList<String>(); for (String expansion : expansions.keySet()) if (expansion.equalsIgnoreCase(entity)) dropped.add(expansion); for (String expansion : dropped) expansions.remove(expansion); return expansions; } /** * Expands a modifier by looking up related modifiers. * * @param modifier a modifier * @param pos its part of speech: <code>POS.ADJECTIVE</code> or * <code>POS.ADVERB</code> * @return related modifiers and their weights */ public Map<String, Double> expandModifier(String modifier, POS pos) { if ((pos.equals(ADJECTIVE) && !isAdjective(modifier)) || (pos.equals(ADVERB) && !isAdverb(modifier))) return new Hashtable<String, Double>(); // synsets of related concepts Map<Synset, Double> synsets = new Hashtable<Synset, Double>(); // synsets that have already been expanded Map<Synset, Double> expanded = new Hashtable<Synset, Double>(); // get most common synset double hurdle = TermExpander.MIN_EXPANSION_WEIGHT; if (SYNONYM_WEIGHT >= hurdle) { Synset synset = getCommonSynset(modifier, pos); if (synset != null) synsets.put(synset, 1d); } // expand synsets int pathLength = 0; while (pathLength++ < MAX_PATH_LENGTH && synsets.size() > 0) { // get synsets and their weights Synset[] currSynsets = synsets.keySet().toArray(new Synset[synsets.size()]); double[] currWeights = new double[synsets.size()]; for (int i = 0; i < synsets.size(); i++) currWeights[i] = synsets.get(currSynsets[i]); for (int i = 0; i < currSynsets.length; i++) { Synset synset = currSynsets[i]; double weight = currWeights[i]; // move to expanded synsets if (synsets.get(synset) == weight) synsets.remove(synset); if (!expanded.containsKey(synset) || expanded.get(synset) < weight) { expanded.put(synset, weight); } else continue; // currently no relations other than synonyms } } for (Synset synset : synsets.keySet()) { double weight = synsets.get(synset); if (!expanded.containsKey(synset) || expanded.get(synset) < weight) expanded.put(synset, weight); } // get concepts in synsets Map<String, Double> expansions = new Hashtable<String, Double>(); for (Synset synset : expanded.keySet()) { double weight = expanded.get(synset); if (weight == 1) weight = SYNONYM_WEIGHT; // direct synonyms for (String expansion : getLemmas(synset)) if (!expansions.containsKey(expansion) || expansions.get(expansion) < weight) expansions.put(expansion, weight); } List<String> dropped = new ArrayList<String>(); for (String expansion : expansions.keySet()) if (expansion.equalsIgnoreCase(modifier)) dropped.add(expansion); for (String expansion : dropped) expansions.remove(expansion); return expansions; } }