/** * Copyright 2014, Emory University * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.emory.clir.clearnlp.lexicon.wordnet; import java.io.FileInputStream; import java.io.InputStream; import java.util.HashSet; import java.util.List; import java.util.Set; import edu.emory.clir.clearnlp.collection.map.ObjectIntHashMap; import edu.emory.clir.clearnlp.collection.pair.ObjectIntPair; import edu.emory.clir.clearnlp.collection.triple.ObjectIntIntTriple; /** * @since 3.0.0 * @author Jinho D. Choi ({@code jinho.choi@emory.edu}) */ public class WNMap { private WNDataMap n_data; private WNDataMap v_data; private WNDataMap a_data; private WNDataMap r_data; private WNIndexMap n_index; private WNIndexMap v_index; private WNIndexMap a_index; private WNIndexMap r_index; public WNMap() {} public WNMap(String wordnetDirectoryPath) { String nPath, vPath, aPath, rPath; try { nPath = wordnetDirectoryPath+"/data.noun"; vPath = wordnetDirectoryPath+"/data.verb"; aPath = wordnetDirectoryPath+"/data.adj"; rPath = wordnetDirectoryPath+"/data.adv"; initDataMaps(new FileInputStream(nPath), new FileInputStream(vPath), new FileInputStream(aPath), new FileInputStream(rPath)); nPath = wordnetDirectoryPath+"/index.noun"; vPath = wordnetDirectoryPath+"/index.verb"; aPath = wordnetDirectoryPath+"/index.adj"; rPath = wordnetDirectoryPath+"/index.adv"; initIndexMaps(new FileInputStream(nPath), new FileInputStream(vPath), new FileInputStream(aPath), new FileInputStream(rPath)); } catch (Exception e) {e.printStackTrace();} } // ------------------------------------ Initializers ------------------------------------ public void initDataMaps(InputStream nIn, InputStream vIn, InputStream aIn, InputStream rIn) throws Exception { n_data = new WNDataMap(nIn); v_data = new WNDataMap(vIn); a_data = new WNDataMap(aIn); r_data = new WNDataMap(rIn); n_data.initRelations(this); v_data.initRelations(this); a_data.initRelations(this); r_data.initRelations(this); } public void initIndexMaps(InputStream nIn, InputStream vIn, InputStream aIn, InputStream rIn) throws Exception { n_index = new WNIndexMap(nIn, n_data); v_index = new WNIndexMap(vIn, v_data); a_index = new WNIndexMap(aIn, a_data); r_index = new WNIndexMap(rIn, r_data); } private WNDataMap getDataMap(char posTag) { switch (posTag) { case WNPOSTag.NOUN : return n_data; case WNPOSTag.VERB : return v_data; case WNPOSTag.ADJECTIVE: return a_data; case WNPOSTag.ADVERB : return r_data; } throw new IllegalArgumentException(posTag+" is not a valid POS tag."); } private WNIndexMap getIndexMap(char posTag) { switch (posTag) { case WNPOSTag.NOUN : return n_index; case WNPOSTag.VERB : return v_index; case WNPOSTag.ADJECTIVE: return a_index; case WNPOSTag.ADVERB : return r_index; } throw new IllegalArgumentException(posTag+" is not a valid POS tag."); } WNSynset getSynset(char posTag, int offset) { return getDataMap(posTag).getSynset(offset); } WNIndex getIndex(char posTag, String lemma) { return getIndexMap(posTag).getIndex(lemma); } // ------------------------------------ Relations ------------------------------------ public boolean isSynonym(char posTag, String lemma1, String lemma2) { WNIndex index1 = getIndex(posTag, lemma1); WNIndex index2 = getIndex(posTag, lemma2); return (index1 == null || index2 == null) ? false : isSynonym(index1, index2); } public boolean isSynonym(WNIndex index1, WNIndex index2) { List<WNSynset> ls = index1.getSynsetList(); List<WNSynset> lb = index2.getSynsetList(); if (lb.size() < ls.size()) { List<WNSynset> t = ls; ls = lb; lb = t; } Set<WNSynset> set = new HashSet<>(lb); for (WNSynset synset : ls) { if (set.contains(synset)) return true; } return false; } public ObjectIntHashMap<WNSynset> getHypernymMap(char posTag, String lemma) { WNIndex index = getIndex(posTag, lemma); return (index == null) ? new ObjectIntHashMap<WNSynset>() : getHypernymMap(index); } public ObjectIntHashMap<WNSynset> getHypernymMap(WNIndex index) { ObjectIntHashMap<WNSynset> map = new ObjectIntHashMap<WNSynset>(); for (WNSynset synset : index.getSynsetList()) { map.put(synset, 0); getHypernymMapAux(map, synset, 1); } return map; } private void getHypernymMapAux(ObjectIntHashMap<WNSynset> map, WNSynset synset, int height) { List<WNRelation> list = synset.getHypernymList(); WNSynset s; int h; for (WNRelation rel : list) { s = rel.getWNSynset(); h = map.get(s); if (h == 0 || height < h) { map.put(s, height); getHypernymMapAux(map, s, height+1); } } } public ObjectIntIntTriple<WNSynset> getLowestCommonSubsumer(char posTag, String lemma1, String lemma2) { ObjectIntHashMap<WNSynset> map1 = getHypernymMap(posTag, lemma1); if (map1.isEmpty()) return null; ObjectIntHashMap<WNSynset> map2 = getHypernymMap(posTag, lemma2); if (map2.isEmpty()) return null; return getLowestCommonSubsumer(map1, map2); } /** {@code map1.size()} > {@code map2.size()}. */ private ObjectIntIntTriple<WNSynset> getLowestCommonSubsumer(ObjectIntHashMap<WNSynset> map1, ObjectIntHashMap<WNSynset> map2) { ObjectIntIntTriple<WNSynset> lcs = new ObjectIntIntTriple<WNSynset>(null, 0, Integer.MAX_VALUE); boolean b = map1.size() < map2.size(); int h1, h2, hs, ms; if (b) { ObjectIntHashMap<WNSynset> t = map1; map1 = map2; map2 = t; } for (ObjectIntPair<WNSynset> p : map2) { if (!map1.containsKey(p.o)) continue; h1 = map1.get(p.o); h2 = p.i; hs = h1 + h2; ms = lcs.i1 + lcs.i2; if (hs < ms || (hs == ms && Math.abs(h1-h2) < Math.abs(lcs.i1-lcs.i2))) lcs.set(p.o, h1, h2); } if (lcs.o == null) return null; if (b) { int t = lcs.i1; lcs.i1 = lcs.i2; lcs.i2 = t; } return lcs; } public Set<String> getSynonymSet(char posTag, String lemma, int... senseIDs) { WNIndex index = getIndex(posTag, lemma); Set<String> set = new HashSet<>(); if (senseIDs.length == 0) { for (WNSynset synset : index.getSynsetList()) set.addAll(synset.getWords()); } else { for (int senseID : senseIDs) set.addAll(index.getSynset(senseID).getWords()); } return set; } }