/**
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package de.tudarmstadt.ukp.lmf.transform.wordnet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import net.sf.extjwnl.JWNLException;
import net.sf.extjwnl.data.POS;
import net.sf.extjwnl.data.Pointer;
import net.sf.extjwnl.data.PointerTarget;
import net.sf.extjwnl.data.Word;
import de.tudarmstadt.ukp.lmf.model.core.LexicalEntry;
import de.tudarmstadt.ukp.lmf.model.core.Sense;
import de.tudarmstadt.ukp.lmf.model.enums.ELabelTypeSemantics;
import de.tudarmstadt.ukp.lmf.model.enums.ERelNameSemantics;
import de.tudarmstadt.ukp.lmf.model.enums.ERelTypeSemantics;
import de.tudarmstadt.ukp.lmf.model.meta.SemanticLabel;
import de.tudarmstadt.ukp.lmf.model.semantics.Synset;
import de.tudarmstadt.ukp.lmf.model.semantics.SynsetRelation;
/**
* This class is used for extraction of synset-relations, defined in Uby-LMF, out of WordNet's data.
* @author Zijad Maksuti
* @author Judith Eckle-Kohler
* @see Synset
* @see SynsetRelation
*
*/
public class SynsetRelationGenerator {
private final SynsetGenerator synsetGenerator;
private final LexicalEntryGenerator lexicalEntryGenerator;
/*
* Mappings between WordNet's pointer-types and corresponding relation names, defined in Uby-LMF
* The mapping is used for SynsetRelations only
* Mappings for different POS are as follows {NOUN, VERB, ADJECTIVE, ADVERB}
*/
private Map<String, String[]> pointerTypeRelNameMappings;
/*
* Mappings between WordNet's pointer-types and corresponding relation types, defined in Uby-LMF
* The mapping is used for SynsetRelations only
* Mappings for different POS are as follows {NOUN, VERB, ADJECTIVE, ADVERB}
*/
private Map<String, ERelTypeSemantics[]> pointerTypeRelTypeMappings;
private Set<String> ignoredPointerKeys; // A set of ignored pointer keys, used for error-detection purposes
private final Map<String, ELabelTypeSemantics> domainOfRegisterMappings = new TreeMap<String,ELabelTypeSemantics>(); // <domainOfRelationKey, register>
// String representations of relation names that are not part of ERelNameSemantics
// relations names that are also mapped to SemanticLabels of Synsets
private static final String topic = "topic";
private static final String region = "region";
private static final String usage = "usage";
private static final String isTopicOf = "isTopicOf";
private static final String isRegionOf = "isRegionOf";
private static final String isUsageOf = "isUsageOf";
private static final String attribute = "attribute";
private static final String verbGroup = "verbGroup";
/**
* Constructs an instance of {@link SynsetRelationGenerator} based on the consumed parameters.
* @param synsetGenerator an instance of {@link SynsetGenerator} used for accessing generated Uby-LMF synsets.
* @param lexicalEntryGenerator an instance of {@link LexicalEntryGenerator} used for
* accessing generated {@link LexicalEntry}-instances.<br>
* Both synsetGenerator and lexicalEntryGenerator must be initialized.
* @see Synset
*/
public SynsetRelationGenerator(SynsetGenerator synsetGenerator, LexicalEntryGenerator lexicalEntryGenerator){
this.synsetGenerator = synsetGenerator;
this.lexicalEntryGenerator = lexicalEntryGenerator;
if(pointerTypeRelNameMappings == null) {
initializePointerMappings();
}
}
/**
* This method iterates over all synsets, provided by synset-generator
* and updates their {@link SynsetRelation}
* @see Synset
*/
public void updateSynsetRelations() {
// Iterate over all Synset-Bindings and
for(Entry<net.sf.extjwnl.data.Synset, Synset> binding : synsetGenerator.getWNSynsetLMFSynsetMappings().entrySet()) {
updateSynsetRelations(binding);
}
}
/**
* This method consumes a binding of a WordNet's synset and it's associated Uby-LMF-synset.
* It updates the {@link SynsetRelations} of the Uby-LMF-synset.
* @param binding the binding of two synset, with WordNet's synset as key
* @see Synset
* @see net.sf.extjwnl.data.Synset
*/
private void updateSynsetRelations(Entry<net.sf.extjwnl.data.Synset, Synset> binding){
// Create SynsetRelation for the binding
List<SynsetRelation> synsetRelations = new LinkedList<SynsetRelation>();
net.sf.extjwnl.data.Synset synset = binding.getKey();
List<Pointer> pointers = synset.getPointers();
int posOrdinal = POS.getAllPOS().indexOf(synset.getPOS()); // ordinal of synset's POS
// Iterate over all pointers of the WNSynset and generate the corresponding SynsetRelation
for(Pointer pointer : pointers) {
if(!ignoredPointerKeys.contains(pointer.getType().getKey())) {
synsetRelations.add(generateSynsetRelation(pointer, posOrdinal));
}
}
binding.getValue().setSynsetRelations(synsetRelations);
}
/**
* This method consumes a pointer of a WordNet's synset and generates the corresponding {@link SynsetRelation}-instance
* @param pointer a {@link Pointer}-instance
* @param posOrdinal the ordinal of the pointer's source part of speech
* @return synset-relation that corresponds to the consumed pointer
* @see Synset
* @see net.sf.extjwnl.data.Synset
* @see POS
*/
private SynsetRelation generateSynsetRelation(Pointer pointer, int posOrdinal){
// Create a SynsetRelation for the pointer
SynsetRelation synsetRelation = new SynsetRelation();
// Setting relationType
String pointerSymbol = pointer.getType().getKey();
ERelTypeSemantics relType = getRelType(pointerSymbol, posOrdinal);
synsetRelation.setRelType(relType);
// Setting relationName
String relationName = getRelationName(pointerSymbol, posOrdinal);
synsetRelation.setRelName(relationName);
// Setting the target
PointerTarget pointerTarget;
try {
pointerTarget = pointer.getTarget();
}
catch (JWNLException e) {
throw new IllegalArgumentException(e);
}
if(pointerTarget instanceof net.sf.extjwnl.data.Synset){
// the target is a Synset
synsetRelation.setTarget(synsetGenerator.getLMFSynset((net.sf.extjwnl.data.Synset)pointerTarget));
/*
* Updating SubjectField-class
* this block will only be executed for DOMAIN-OF pointers
*/
if(domainOfRegisterMappings.keySet().contains(pointerSymbol)){
// SenseGenerator is needed in order to obtain the Lexeme's corresponding Sense
SenseGenerator senseGenerator = lexicalEntryGenerator.getSenseGenerator();
net.sf.extjwnl.data.Synset targetSynset;
try {
targetSynset = (net.sf.extjwnl.data.Synset) pointer.getTarget();
}
catch (JWNLException e) {
throw new IllegalArgumentException(e);
}
// iterate over every lexeme of the source synset
for(Word lexeme : ((net.sf.extjwnl.data.Synset)pointer.getSource()).getWords()){
// Obtain lexeme's Sense
Sense sense = senseGenerator.getSense(lexeme);
// obtain semantic labels
List<SemanticLabel> semanticLabels = sense.getSemanticLabels();
if(semanticLabels == null) {
semanticLabels = new LinkedList<SemanticLabel>();
}
// create a new SemanticLabel and add it to the list
SemanticLabel semanticLabel = createSemanticLabel(targetSynset, pointerSymbol);
semanticLabels.add(semanticLabel);
// set the subjectField
sense.setSemanticLabels(semanticLabels);
}
}
}
return synsetRelation;
}
/**
* This method consumes targeted synset of a WordNet's "Domain-Of" relation and generates the
* corresponding instance of {@link SemanticLabel}-class
* @param targetSynset synset targeted by a "Domain-Of" relation
* @param key WordNet's symbol describing the relation
* @return instance of SemanticLabel class associated with the consumed parameters
*/
private SemanticLabel createSemanticLabel(net.sf.extjwnl.data.Synset targetSynset, String key) {
SemanticLabel semanticLabel = new SemanticLabel();
semanticLabel.setLabel(getSemanticLabel(targetSynset));
semanticLabel.setType(domainOfRegisterMappings.get(key));
return semanticLabel;
}
/**
* This method consumes a targeted WordNet-synset, and returns the semantic label.<br>
* Semantic label is the lemma of the synset's first lexeme.
* @param targetSynset WordNet's synset from which the semantic label should be extracted
* @return the the lemma of the targetSynset's first lexeme
*/
private String getSemanticLabel(net.sf.extjwnl.data.Synset targetSynset) {
return targetSynset.getWords().get(0).getLemma();
}
/**
* This method consumes a WN-PointerSymbol and returns the corresponding SynsetRelation-relType
* @param pointerSymbol the Pointer's symbol
* @param posOrdinal the ordinal of synset's POS
* @return corresponding relation type
* @see SynsetRelation
* @see Pointer
* @see ERelTypeSemantics
*/
private ERelTypeSemantics getRelType(String pointerSymbol, int posOrdinal) {
// The relType also depends on the POS of the pointer's Synset
return pointerTypeRelTypeMappings.get(pointerSymbol)[posOrdinal];
}
/**
* This method consumes a WN-PointerSymbol and returns the corresponding SynsetRelation-relName
* @param pointerSymbol the Pointer's symbol
* @param posOrdinal the ordinal of synset's POS
* @return corresponding relation name
* @see SynsetRelation
* @see Pointer
* @see ERelTypeSemantics
*/
private String getRelationName(String pointerSymbol, int posOrdinal) {
return pointerTypeRelNameMappings.get(pointerSymbol)[posOrdinal];
}
/**
* This method initializes the pointerMappings
*/
private void initializePointerMappings() {
// The Mappings for different POS are as follows {NOUN, VERB, ADJECTIVE, ADVERB}
pointerTypeRelNameMappings = new TreeMap<String, String[]>();
pointerTypeRelTypeMappings = new TreeMap<String, ERelTypeSemantics[]>();
// Adding mappings
// hypernym
pointerTypeRelNameMappings.put("@", new String[]
{ERelNameSemantics.HYPERNYM, ERelNameSemantics.HYPERNYM, null, null});
pointerTypeRelTypeMappings.put("@", new ERelTypeSemantics[]
{ERelTypeSemantics.taxonomic, ERelTypeSemantics.taxonomic, null, null});
// hypernymInstance
pointerTypeRelNameMappings.put("@i", new String[]
{ERelNameSemantics.HYPERNYMINSTANCE, null, null, null});
pointerTypeRelTypeMappings.put("@i", new ERelTypeSemantics[]
{ERelTypeSemantics.taxonomic, null, null, null});
// hyponym
pointerTypeRelNameMappings.put("~", new String[]
{ERelNameSemantics.HYPONYM, ERelNameSemantics.HYPONYM, null, null});
pointerTypeRelTypeMappings.put("~", new ERelTypeSemantics[]
{ERelTypeSemantics.taxonomic, ERelTypeSemantics.taxonomic, null, null});
// hyponymInstance
pointerTypeRelNameMappings.put("~i", new String[]
{ERelNameSemantics.HYPONYMINSTANCE, null, null, null});
pointerTypeRelTypeMappings.put("~i", new ERelTypeSemantics[]
{ERelTypeSemantics.taxonomic, null, null, null});
// holonymMember
pointerTypeRelNameMappings.put("#m", new String[]
{ERelNameSemantics.HOLONYMMEMBER, null, null, null});
pointerTypeRelTypeMappings.put("#m", new ERelTypeSemantics[]
{ERelTypeSemantics.partWhole, null, null, null});
// holonymSubstance
pointerTypeRelNameMappings.put("#s", new String[]
{ERelNameSemantics.HOLONYMSUBSTANCE, null, null, null});
pointerTypeRelTypeMappings.put("#s", new ERelTypeSemantics[]
{ERelTypeSemantics.partWhole, null, null, null});
// holonymPart
pointerTypeRelNameMappings.put("#p", new String[]
{ERelNameSemantics.HOLONYMPART, null, null, null});
pointerTypeRelTypeMappings.put("#p", new ERelTypeSemantics[]
{ERelTypeSemantics.partWhole, null, null, null});
// meronymMember
pointerTypeRelNameMappings.put("%m", new String[]
{ERelNameSemantics.MERONYMMEMBER, null, null, null});
pointerTypeRelTypeMappings.put("%m", new ERelTypeSemantics[]
{ERelTypeSemantics.partWhole, null, null, null});
// meronymSubstance
pointerTypeRelNameMappings.put("%s", new String[]
{ERelNameSemantics.MERONYMSUBSTANCE, null, null, null});
pointerTypeRelTypeMappings.put("%s", new ERelTypeSemantics[]
{ERelTypeSemantics.partWhole, null, null, null});
// meronymPart
pointerTypeRelNameMappings.put("%p", new String[]
{ERelNameSemantics.MERONYMPART, null, null, null});
pointerTypeRelTypeMappings.put("%p", new ERelTypeSemantics[]
{ERelTypeSemantics.partWhole, null, null, null});
// nounAdjPair nounAdjGroup
pointerTypeRelNameMappings.put("=", new String[]
{attribute, null, attribute, null});
pointerTypeRelTypeMappings.put("=", new ERelTypeSemantics[]
{ERelTypeSemantics.association, null, ERelTypeSemantics.association, null});
// topic
pointerTypeRelNameMappings.put(";c", new String[]
{topic, topic, topic, topic});
pointerTypeRelTypeMappings.put(";c", new ERelTypeSemantics[]
{ERelTypeSemantics.label, ERelTypeSemantics.label, ERelTypeSemantics.label, ERelTypeSemantics.label});
// isTopicOf
pointerTypeRelNameMappings.put("-c", new String[]
{isTopicOf, null, null, null});
pointerTypeRelTypeMappings.put("-c", new ERelTypeSemantics[]
{ERelTypeSemantics.predicative, null, null, null});
// region
pointerTypeRelNameMappings.put(";r", new String[]
{region, region, region, region});
pointerTypeRelTypeMappings.put(";r", new ERelTypeSemantics[]
{ERelTypeSemantics.label, ERelTypeSemantics.label, ERelTypeSemantics.label, ERelTypeSemantics.label});
// isRegionOf
pointerTypeRelNameMappings.put("-r", new String[]
{isRegionOf, null, null, null});
pointerTypeRelTypeMappings.put("-r", new ERelTypeSemantics[]
{ERelTypeSemantics.predicative, null, null, null});
// usage
pointerTypeRelNameMappings.put(";u", new String[]
{usage, usage, usage, usage});
pointerTypeRelTypeMappings.put(";u", new ERelTypeSemantics[]
{ERelTypeSemantics.label, ERelTypeSemantics.label, ERelTypeSemantics.label, ERelTypeSemantics.label});
// isUsageOf
pointerTypeRelNameMappings.put("-u", new String[]
{isUsageOf, null, null, null});
pointerTypeRelTypeMappings.put("-u", new ERelTypeSemantics[]
{ERelTypeSemantics.predicative, null, null, null});
// entails
pointerTypeRelNameMappings.put("*", new String[]
{null, ERelNameSemantics.ENTAILS, null, null});
pointerTypeRelTypeMappings.put("*", new ERelTypeSemantics[]
{null, ERelTypeSemantics.taxonomic, null, null});
// causation
pointerTypeRelNameMappings.put(">", new String[]
{null, ERelNameSemantics.CAUSEDBY, null, null});
pointerTypeRelTypeMappings.put(">", new ERelTypeSemantics[]
{null, ERelTypeSemantics.taxonomic, null, null});
// seeAlso
pointerTypeRelNameMappings.put("^", new String[]
{null, ERelNameSemantics.SEEALSO, ERelNameSemantics.SEEALSO, null});
pointerTypeRelTypeMappings.put("^", new ERelTypeSemantics[]
{null, ERelTypeSemantics.association, ERelTypeSemantics.association, null});
// verbGroup
pointerTypeRelNameMappings.put("$", new String[]
{null, verbGroup, null, null});
pointerTypeRelTypeMappings.put("$", new ERelTypeSemantics[]
{null, ERelTypeSemantics.association, null, null});
// nearSynonym
pointerTypeRelNameMappings.put("&", new String[]
{null, null, ERelNameSemantics.SYNONYMNEAR, null});
pointerTypeRelTypeMappings.put("&", new ERelTypeSemantics[]
{null, null, ERelTypeSemantics.association, null});
// Set the ignored keys
ignoredPointerKeys = new TreeSet<String>();
ignoredPointerKeys.add("!");
ignoredPointerKeys.add("+");
ignoredPointerKeys.add("<");
ignoredPointerKeys.add("\\");
// Setting mappings related to domainOf Relations
domainOfRegisterMappings.put(";c", ELabelTypeSemantics.domain);
domainOfRegisterMappings.put(";r", ELabelTypeSemantics.regionOfUsage);
domainOfRegisterMappings.put(";u", ELabelTypeSemantics.usage);
}
}