/** * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package de.tudarmstadt.ukp.lmf.transform.germanet; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import de.tudarmstadt.ukp.lmf.model.core.LexicalEntry; import de.tudarmstadt.ukp.lmf.model.core.Lexicon; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.model.enums.ELanguageIdentifier; import de.tudarmstadt.ukp.lmf.model.enums.EPartOfSpeech; import de.tudarmstadt.ukp.lmf.model.enums.ERelTypeMorphology; import de.tudarmstadt.ukp.lmf.model.morphology.FormRepresentation; import de.tudarmstadt.ukp.lmf.model.morphology.Lemma; import de.tudarmstadt.ukp.lmf.model.morphology.RelatedForm; import de.tudarmstadt.ukp.lmf.model.semantics.PredicativeRepresentation; import de.tudarmstadt.ukp.lmf.model.semantics.SemanticPredicate; import de.tudarmstadt.ukp.lmf.model.syntax.SyntacticBehaviour; import de.tuebingen.uni.sfs.germanet.api.Frame; import de.tuebingen.uni.sfs.germanet.api.LexRel; import de.tuebingen.uni.sfs.germanet.api.LexUnit; /** * Instance of this class offers methods for creating {@link LexicalEntry} * out of GermaNet's data. */ public class LexicalEntryGenerator { // converter associated with this LexicalEntryGenerator private final GNConverter converter; private SenseGenerator senseGenerator; // running number used for creating IDs of LexicalEntries private int lexicalEntryNumber = 0; // running number used for creating IDs of SyntacitBehaviours private int syntacticBehaviourNumber = 0; // Mappings between LexicalEntries and their' corresponding LexUnit-groups private final HashMap<LexicalEntry, Set<LexUnit>> leLUGroupMappings = new HashMap<LexicalEntry, Set<LexUnit>>(); // Mappings between LexUnit-groups and their' corresponding LexicalEntries private final Map<Set<LexUnit>, LexicalEntry> luGroupLEMappings = new HashMap<Set<LexUnit>, LexicalEntry>(); private final Log logger = LogFactory.getLog(getClass()); /** * Constructs an instance of {@link LexicalEntryGenerator}, which provides methods for creating <br> * LexicalEntries out of GermaNet's files * @param converter an instance of {@link GNConverter} associated with this generator * @param resourceVersion Version of the resource */ public LexicalEntryGenerator(GNConverter converter, String resourceVersion){ this.converter = converter; if (senseGenerator == null) { senseGenerator = new SenseGenerator(converter.getGnet(), resourceVersion); } } /** * This method creates a {@link LexicalEntry} based on the * consumed {@link Set} of LexicalUnits * @param luGroup a group of LexUnits from which a LexicalEntry should be created * @return LexicalEntry based on consumed group of LexUnits */ public LexicalEntry createLexicalEntry(Set<LexUnit> luGroup) { LexicalEntry lexicalEntry = new LexicalEntry(); // Create ID lexicalEntry.setId(getLEID(luGroup)); // Implied // Create partOfSpeech lexicalEntry.setPartOfSpeech(getLEPOS(luGroup)); //*** Creating Lemma ***// Lemma lemma = new Lemma(); lexicalEntry.setLemma(lemma); // appending List<FormRepresentation> formRepresentations = getFormRepresentations(luGroup); // get all FormRepresentation for this LU lemma.setFormRepresentations(formRepresentations); //*** Creating Senses***// List<Sense> senses = senseGenerator.generateSenses(luGroup); //** Creating SyntacticBehavior (one for each LexUnit in the group)**// SubcategorizationFrameExtractor subcatFrameExtr = converter.getSubcategorizationFrameExtractor(); if (lexicalEntry.getPartOfSpeech().equals(EPartOfSpeech.verb)) { List<SyntacticBehaviour> syntacticBehaviours = new LinkedList <SyntacticBehaviour>(); // A SyntacticBehaviour can only be created for verbs LinkedList<Sense> newSenses = new LinkedList<Sense>(); for (LexUnit lu : luGroup) { List <Frame> gnFrames = lu.getFrames(); Sense sense = senseGenerator.getSynsetGenerator().getSense(lu); newSenses.add(sense); for (Frame gnFrame : gnFrames) { SyntacticBehaviour syntacticBehaviour = new SyntacticBehaviour(); // Generating an ID StringBuffer sb = new StringBuffer(32); sb.append("GN_SyntacticBehaviour_").append(syntacticBehaviourNumber); syntacticBehaviourNumber++; syntacticBehaviour.setId(sb.toString()); syntacticBehaviour.setSense(sense); syntacticBehaviour.setSubcategorizationFrame(subcatFrameExtr.getSubcategorizationFrame(gnFrame.toString())); syntacticBehaviours.add(syntacticBehaviour); SemanticPredicate semanticPredicate = subcatFrameExtr.getSemanticPredicate(gnFrame.toString()); if(semanticPredicate != null){ List<PredicativeRepresentation> predicativeRepresentations = sense.getPredicativeRepresentations(); if(predicativeRepresentations == null) { predicativeRepresentations = new LinkedList <PredicativeRepresentation>(); } PredicativeRepresentation predicativeRepresentation = new PredicativeRepresentation(); predicativeRepresentation.setPredicate(semanticPredicate); predicativeRepresentations.add(predicativeRepresentation); sense.setPredicativeRepresentations(predicativeRepresentations); } } } lexicalEntry.setSyntacticBehaviours(syntacticBehaviours); lexicalEntry.setSenses(newSenses); } else { lexicalEntry.setSenses(senses); } // record this mappings for later usage leLUGroupMappings.put(lexicalEntry, luGroup); luGroupLEMappings.put(luGroup, lexicalEntry); return lexicalEntry; } /** * Consumes an instance of a {@link LexicalEntry} and appends the associated * RelatedForms to it. <br> * This method should only be invoked after all LexicalEntries of a {@link Lexicon} * have been created (without RelatedForms) * @param lexicalEntry an instance of LexicalEntry to which RelatedForms should be appended * @see RelatedForm */ public void setRelatedForms(LexicalEntry lexicalEntry) { Set<LexUnit> luGroup = leLUGroupMappings.get(lexicalEntry); if (luGroup.isEmpty()) { throw new RuntimeException("Found empty LUgroup for " + lexicalEntry.getId()); } List<RelatedForm> relatedForms = new LinkedList<RelatedForm>(); for (LexUnit lu : luGroup) { // Extracting derivationBaseVerb List<LexUnit> participles = lu.getRelatedLexUnits(LexRel.has_participle); if (!participles.isEmpty()) { for (LexUnit participle : participles) { RelatedForm relatedForm = getRelatedForm(participle); relatedForm.setRelType(ERelTypeMorphology.derivationBaseVerb); relatedForms.add(relatedForm); } } // Extracting derivationBaseVerbAdj List<LexUnit> pertonyms = lu.getRelatedLexUnits(LexRel.has_pertainym); if (!pertonyms.isEmpty()) { for (LexUnit pertonym : pertonyms) { RelatedForm relatedForm = getRelatedForm(pertonym); relatedForm.setRelType(ERelTypeMorphology.derivationBaseVerbAdj); relatedForms.add(relatedForm); } } } removeDuplicateRelatedForms(relatedForms); lexicalEntry.setRelatedForms(relatedForms); } /** * This method consumes an instance of {@link LexUnit} and returns * the corresponding instance of {@link RelatedForm} * @param lexUnit lexical unit for which an instance of RelatedForm class should be returned * @return RelatedForm which is associated with consumed lexUnit */ private RelatedForm getRelatedForm(LexUnit lexUnit){ Set<LexUnit> targetGroup = converter.getLUGroup(lexUnit); if (targetGroup == null) { throw new RuntimeException("No LU group found for lexUnit " + lexUnit.getId()); } RelatedForm relatedForm = new RelatedForm(); relatedForm.setTargetLexicalEntry(luGroupLEMappings.get(targetGroup)); relatedForm.setTargetSense(senseGenerator.getSense(lexUnit)); return relatedForm; } /** * This method consumes a group of LexUnits and returns a {@link List} of instances of {@link FormRepresentation}, <br> * generated from consumed group of LexUnits * @param luGroup a group of LexUnits, for which the list of FormRepresentations should be generated * @return A list of instances of FormRepresentations from the consumed luGroup * @since UBY 0.1.0 */ private List<FormRepresentation> getFormRepresentations(Set<LexUnit> luGroup){ HashMap<String, String> mappings = new HashMap<String, String>(); // <orthographyName,orthographyForm> String orthForm = null; String orthVar = null; String oldOrthForm = null; String oldOrthVar = null; for (LexUnit lu : luGroup) { // Extracting orthForm String orthForm2 = lu.getOrthForm(); if (orthForm == null) { orthForm = orthForm2; } else if (orthForm2 != null && !orthForm.equals(orthForm2)) { logger.warn("conflict, diffrent orthForm in same luGroup!"); } // Extracting orthVar String orthVar2 = lu.getOrthVar(); if (orthVar == null) { orthVar = orthVar2; } else if (orthVar2 != null && !orthVar.equals(orthVar2)) { logger.warn("conflict, diffrent orthVar in same luGroup!"); } // Extracting oldOrthForm String oldOrthForm2 = lu.getOldOrthForm(); if (oldOrthForm == null) { oldOrthForm = oldOrthForm2; } else if (oldOrthForm2 != null && !oldOrthForm.equals(oldOrthForm2)) { logger.warn("LexicalEntryGenerator: conflict, diffrent oldOrthForm in same luGroup!"); } // Extracting oldOrthVar String oldOrthVar2 = lu.getOldOrthVar(); if (oldOrthVar == null) { oldOrthVar = oldOrthVar2; } else if (oldOrthVar2 != null && !oldOrthVar.equals(oldOrthVar2)) { logger.warn("LexicalEntryGenerator: conflict, diffrent oldOrthVar in same luGroup!"); } } // Add the mappings if (orthForm != null) { mappings.put("orthForm", orthForm); } if (orthVar != null) { mappings.put("orthVar", orthVar); } if (oldOrthForm != null) { mappings.put("oldOrthForm", oldOrthForm); } if (oldOrthVar != null) { mappings.put("oldOrthVar", oldOrthVar); } List<FormRepresentation> formRepresentations = new LinkedList<FormRepresentation>(); for(String orthName : mappings.keySet()){ FormRepresentation formRepresentation = new FormRepresentation(); formRepresentation.setLanguageIdentifier(ELanguageIdentifier.GERMAN); formRepresentation.setWrittenForm(mappings.get(orthName)); String orthographyName = null; //TODO: standardize names in the model (e.g., using constants!) //TODO: check definition of the "variants" - IMHO, there should be only two values (new and old orthography). if (orthName.equals("orthForm")) { orthographyName = "new German orthography"; } else if (orthName.equals("orthVar")) { orthographyName = "new German orthographical variant"; } else if (orthName.equals("oldOrthForm")) { orthographyName = "old German orthography"; } else if (orthName.equals("oldOrthVar")) { orthographyName = "old German orthographical variant"; } else { throw new RuntimeException("Unknown orthography: " + orthForm); } formRepresentation.setOrthographyName(orthographyName); formRepresentations.add(formRepresentation); } return formRepresentations; } /** * Consumes a group LexicalUnits and returns {@link EPartOfSpeech} of * corresponding {@link LexicalEntry} * @param luGroup s group LexicalUnits * @return part of speech of the LexicalEntry * @see LexUnit */ private EPartOfSpeech getLEPOS(Set<LexUnit> luGroup) { for (LexUnit lu : luGroup) { String wordForm = lu.getWordCategory().name(); if (wordForm.equals("adj")) { return EPartOfSpeech.adjective; } else if (wordForm.equals("nomen")) { if (lu.isNamedEntity()) { return EPartOfSpeech.nounProper; } else { return EPartOfSpeech.nounCommon; } } else if (wordForm.equals("verben")) { return EPartOfSpeech.verb; } } throw new RuntimeException("Undefined part of speech for " + luGroup.iterator().next().getId()); } /** * This method consumes a group of LexicalUnits and generates a unique * ID for corresponding {@link LexicalEntry} * @param luGroup a group of LexicalUnits for which LexicalEntry an id will be generated * @return generated id * @see LexUnit */ private String getLEID(Set<LexUnit> luGroup) { LexicalEntry generatedLexicalEntry = luGroupLEMappings.get(luGroup); if (generatedLexicalEntry != null) { return generatedLexicalEntry.getId(); } else{ return "GN_LexicalEntry_" + (lexicalEntryNumber++); } } /** * This method consumes a list of RelatedForms and removes * all duplicates from the consumed list * @param relatedForms {@link List} of RelatedForm objects from which duplicates should be removed * @see {@link RelatedForm} * @since UBY 0.1.0 */ private void removeDuplicateRelatedForms(List<RelatedForm> relatedForms) { if (relatedForms.isEmpty()) return; HashSet<RelatedForm> temp = new HashSet<RelatedForm>(); temp.addAll(relatedForms); relatedForms.clear(); relatedForms.addAll(temp); } }