/**
* Copyright 2014, Emory University
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.emory.clir.clearnlp.lexicon.wordnet;
import java.util.ArrayList;
import java.util.List;
import edu.emory.clir.clearnlp.util.Joiner;
import edu.emory.clir.clearnlp.util.Splitter;
import edu.emory.clir.clearnlp.util.constant.StringConst;
/**
* @since 3.0.0
* @author Jinho D. Choi ({@code jinho.choi@emory.edu})
*/
public class WNSynset
{
private String s_lexicographerFileNumber;
private int n_synsetOffset;
private char c_posTag;
private WNPointer[] a_pointers;
private List<String> s_words;
private String s_gloss;
private List<WNRelation> l_antonym;
private List<WNRelation> l_hypernym;
private List<WNRelation> l_hyponym;
// private List<WNRelation> l_attribute;
// private List<WNRelation> l_derivationallyRelatedForm;
// private List<WNRelation> l_entailment;
// private List<WNRelation> l_cause;
// private List<WNRelation> l_alsoSee;
// private List<WNRelation> l_verbGroup;
// private List<WNRelation> l_similarTo;
// private List<WNRelation> l_participleOfVerb;
// private List<WNRelation> l_pertainym;
// private List<WNRelation> l_instanceHypernym;
// private List<WNRelation> l_instanceHyponym;
// private List<WNRelation> l_memberHolonym;
// private List<WNRelation> l_substanceHolonym;
// private List<WNRelation> l_partHolonym;
// private List<WNRelation> l_memberMeronym;
// private List<WNRelation> l_substanceMeronym;
// private List<WNRelation> l_partMeronym;
// private List<WNRelation> l_domainTopic;
// private List<WNRelation> l_domainRegion;
// private List<WNRelation> l_domainUsage;
// private List<WNRelation> l_memberTopic;
// private List<WNRelation> l_memberRegion;
// private List<WNRelation> l_memberUsage;
public WNSynset()
{
s_words = new ArrayList<>();
}
/**
* @param line a line from the WordNet data file (e.g., data.verb).
* e.g., "02166460 39 v 02 study 0 consider 0 009 @ 00630380 v 0000 + 07138915 n 0201 + 05822746 n 0203 + 05784831 n 0201 + 05784242 n 0102 + 00644503 n 0102 ~ 00640650 v 0000 ~ 00653620 v 0000 $ 00813044 v 0000 03 + 08 00 + 09 00 + 29 00 | give careful consideration to; "consider the possibility of moving""
*/
public WNSynset(String line)
{
int i, count, idx = 0;
WNPointer pointer;
String[] t;
i = line.indexOf(StringConst.PIPE);
if (i > 0)
{
setGloss(line.substring(i+1).trim());
line = line.substring(0, i).trim();
}
else
{
setGloss(StringConst.EMPTY);
line = line.trim();
}
t = Splitter.splitSpace(line);
setSynsetOffset(Integer.parseInt(t[idx++]));
setLexicographerFileNumber(t[idx++]);
setPOSTag(t[idx++].charAt(0));
count = Integer.parseInt(t[idx++], 16);
s_words = new ArrayList<String>(count);
for (i=0; i<count; i++)
{
addWord(t[idx++]);
idx++; // skip lexical ID
}
count = Integer.parseInt(t[idx++]);
a_pointers = new WNPointer[count];
for (i=0; i<count; i++)
{
pointer = new WNPointer();
pointer.setPointerSymbol(t[idx++]);
pointer.setSynsetOffset(Integer.parseInt(t[idx++]));
pointer.setPOSTag(t[idx++].charAt(0));
pointer.setSource((short)(Short.parseShort(t[idx].substring(0,2), 16) - 1));
pointer.setTarget((short)(Short.parseShort(t[idx++].substring(2), 16) - 1));
a_pointers[i] = pointer;
}
// ignores [+ f_num w_num]*
}
// ------------------------------------ Getters/Setters ------------------------------------
public String getLexicographerFileNumber()
{
return s_lexicographerFileNumber;
}
public int getSynsetOffset()
{
return n_synsetOffset;
}
public char getPOSTag()
{
return c_posTag;
}
public List<String> getWords()
{
return s_words;
}
public String getGloss()
{
return s_gloss;
}
public void setLexicographerFileNumber(String number)
{
s_lexicographerFileNumber = number;
}
public void setSynsetOffset(int offset)
{
n_synsetOffset = offset;
}
public void setPOSTag(char tag)
{
c_posTag = tag;
}
public void addWord(String word)
{
s_words.add(word);
}
public void setGloss(String gloss)
{
s_gloss = gloss;
}
// ------------------------------------ Initializers ------------------------------------
void initRelations(WNMap map)
{
l_antonym = new ArrayList<>();
l_hypernym = new ArrayList<>();
l_hyponym = new ArrayList<>();
for (WNPointer pointer : a_pointers)
initRelation(map, pointer);
a_pointers = null;
}
/** Called by {@link #initRelations(WNDataMap)}. */
private void initRelation(WNMap map, WNPointer pointer)
{
switch (pointer.getPointerSymbol())
{
case "!": initRelationAux(map, pointer, l_antonym); break;
case "@": initRelationAux(map, pointer, l_hypernym); break;
case "~": initRelationAux(map, pointer, l_hyponym); break;
}
}
/** Called by {@link #initRelation(WNDataMap, WNPointer)}. */
private void initRelationAux(WNMap map, WNPointer pointer, List<WNRelation> list)
{
list.add(getRelation(map, pointer));
}
private WNRelation getRelation(WNMap map, WNPointer pointer)
{
WNSynset synset = map.getSynset(pointer.getPOSTag(), pointer.getSynsetOffset());
WNRelation relation = new WNRelation();
relation.setWNSynset(synset);
relation.setSource(pointer.getSource());
relation.setTarget(pointer.getTarget());
return relation;
}
// ------------------------------------ Initializers ------------------------------------
public List<WNRelation> getAntonymList()
{
return l_antonym;
}
public List<WNRelation> getHypernymList()
{
return l_hypernym;
}
public List<WNRelation> getHyponymList()
{
return l_hyponym;
}
public String toString()
{
return c_posTag + ": " + Joiner.join(s_words, StringConst.SPACE);
}
}