/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.verbnet; import java.io.BufferedReader; import java.io.File; import java.io.FileReader; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import de.tudarmstadt.ukp.lmf.model.core.Definition; import de.tudarmstadt.ukp.lmf.model.core.LexicalEntry; import de.tudarmstadt.ukp.lmf.model.core.Lexicon; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.model.core.TextRepresentation; import de.tudarmstadt.ukp.lmf.model.enums.ECase; import de.tudarmstadt.ukp.lmf.model.enums.EComplementizer; import de.tudarmstadt.ukp.lmf.model.enums.EDeterminer; import de.tudarmstadt.ukp.lmf.model.enums.EExampleType; import de.tudarmstadt.ukp.lmf.model.enums.EGrammaticalFunction; import de.tudarmstadt.ukp.lmf.model.enums.EGrammaticalNumber; import de.tudarmstadt.ukp.lmf.model.enums.ELabelTypeSemantics; import de.tudarmstadt.ukp.lmf.model.enums.ELanguageIdentifier; import de.tudarmstadt.ukp.lmf.model.enums.EPartOfSpeech; import de.tudarmstadt.ukp.lmf.model.enums.ESyntacticCategory; import de.tudarmstadt.ukp.lmf.model.enums.ESyntacticProperty; import de.tudarmstadt.ukp.lmf.model.enums.ETense; import de.tudarmstadt.ukp.lmf.model.enums.EVerbForm; import de.tudarmstadt.ukp.lmf.model.meta.SemanticLabel; import de.tudarmstadt.ukp.lmf.model.morphology.FormRepresentation; import de.tudarmstadt.ukp.lmf.model.morphology.Lemma; import de.tudarmstadt.ukp.lmf.model.semantics.MonolingualExternalRef; import de.tudarmstadt.ukp.lmf.model.semantics.PredicativeRepresentation; import de.tudarmstadt.ukp.lmf.model.semantics.SemanticArgument; import de.tudarmstadt.ukp.lmf.model.semantics.SemanticPredicate; import de.tudarmstadt.ukp.lmf.model.semantics.SenseExample; import de.tudarmstadt.ukp.lmf.model.semantics.SynSemArgMap; import de.tudarmstadt.ukp.lmf.model.semantics.SynSemCorrespondence; import de.tudarmstadt.ukp.lmf.model.syntax.LexemeProperty; import de.tudarmstadt.ukp.lmf.model.syntax.SubcatFrameSetElement; import de.tudarmstadt.ukp.lmf.model.syntax.SubcategorizationFrame; import de.tudarmstadt.ukp.lmf.model.syntax.SubcategorizationFrameSet; import de.tudarmstadt.ukp.lmf.model.syntax.SyntacticArgument; import de.tudarmstadt.ukp.lmf.model.syntax.SyntacticBehaviour; /** * This class extracts information from a preprocessed version of VerbNet and fills in the corresponding LMF classes * @author Eckle-Kohler * */ public class VerbNetExtractor { public static final String SENSE = "sense"; public Lexicon lexicon = new Lexicon(); private final File verbNetInputFile; // The File containing the VerbNet Input private final String resourceID; // name of the LMF lexicon, i.e. "VerbNet" // running numbers for IDs private static int lexicalEntryNumber = 0; private static int senseNumber = 0; private static int senseExampleNumber = 0; private static int syntacticBehaviourNumber = 0; private static int subcatFrameSetNumber = 0; private static int subcatFrameNumber = 0; private static int semanticPredNumber = 0; private static int synSemCorrNumber = 0; private static int syntacticArgumentNumber = 0; private static int semanticArgumentNumber = 0; private final String resourceVersion; // Mapping between verb lemmas and their' corresponding sense definitions in VerbNet private static Map<String, Set<VerbNetSense>> LemmaVerbNetSenseMappings = new TreeMap<String, Set<VerbNetSense>>(); // Mapping between LMF-Code of purely syntactic SC-Frame and SubcategorizationFrame private static Map<String, SubcategorizationFrame> synargsSubcatFrameMap = new TreeMap<String, SubcategorizationFrame>(); // Mapping between LMF-Code of syntactic/semantic (including thematic roles) SC-Frame and SynSemCorrespondence private static Map<List<String>, SynSemCorrespondence> predsynsemargsSynSemCorrMap = new LinkedHashMap<List<String>, SynSemCorrespondence>(); // Mapping between className and SubcategorizationFrameSet private static Map<String, SubcategorizationFrameSet> classSubcatFrameSetMap = new TreeMap<String, SubcategorizationFrameSet>(); // Mapping between className and set of SubcategorizationFrames private static Map<String, Set<SubcategorizationFrame>> classSCframeElementsMap = new TreeMap<String, Set<SubcategorizationFrame>>(); // Mapping between LMF-Code of semantic predicate and SemanticPredicate private static Map<String, SemanticPredicate> predSemPredicateMap = new TreeMap<String, SemanticPredicate>(); // Mapping between VerbNetSense and SubcategorizationFrameSet private static Map<VerbNetSense, SubcategorizationFrameSet> senseSubcatFrameSetMap = new LinkedHashMap<VerbNetSense, SubcategorizationFrameSet>(); private static List<VerbNetSense> listOfVerbNetSenses = new LinkedList <VerbNetSense>(); /** * Constructs a VerbNetExtractor * @param verbNetInput path of the File containing the preprocessed version of VerbNet * @param resourceID name of the LMF Lexicon instance * @param resourceVersion Version of the resource * @return VerbNetExtractor * @throws IOException */ public VerbNetExtractor(File verbNetInput, String resourceID, String resourceVersion) throws IOException { this.verbNetInputFile = verbNetInput; this.resourceID = resourceID; this.resourceVersion = resourceVersion; parseVerbNetInput(); convertVerbNetInput(); } /** * This method parses the document containing the verbNetInput * verbNetInput has the form: <verb>#<wnSense>#<example>#((<Arg>),..,(<Arg>))#<predicateString># ... * * @throws IOException */ private void parseVerbNetInput() throws IOException { System.out.print("Parsing VerbNet Input..."); BufferedReader input = new BufferedReader(new FileReader(verbNetInputFile)); try { String line; String[] parts; List<String> synSemArgs = new LinkedList <String>(); Set<VerbNetSense> vnSenses = new LinkedHashSet<VerbNetSense>(); // Processed VerbNet senses while ((line = input.readLine()) != null) { parts = line.split("#"); VerbNetSense verbNetSense = new VerbNetSense(parts[0],parts[1],parts[2],parts[3],parts[4],parts[5],parts[6],parts[7]); synSemArgs = getSelRestr(verbNetSense.arguments,verbNetSense.roleSet); verbNetSense.synSemArgs = synSemArgs; if (LemmaVerbNetSenseMappings.containsKey(verbNetSense.lemma)) { vnSenses = LemmaVerbNetSenseMappings.get(verbNetSense.lemma); vnSenses.add(verbNetSense); LemmaVerbNetSenseMappings.put(verbNetSense.lemma,vnSenses); } else { Set<VerbNetSense> newSense = new LinkedHashSet<VerbNetSense>(); newSense.add(verbNetSense); LemmaVerbNetSenseMappings.put(verbNetSense.lemma,newSense); } listOfVerbNetSenses.add(verbNetSense); } System.out.println("done"); } finally { input.close(); } } /** * This method extracts selectional restrictions from the syntactic-semantic arguments * and from the set of thematic roles * @param arguments syntactic-semantic arguments * @param roleSet the set of thematic roles * @return selectional restrictions */ private List<String> getSelRestr(String arguments, String roleSet) { List<String> newArgs = new LinkedList <String>(); Map<String, String> RoleRestrMap = new LinkedHashMap<String, String>(); String [] roles = roleSet.split("%"); for(String role : roles){ String[] pair = role.split("\\["); String themRole = pair[0]; RoleRestrMap.put(themRole,role); } String [] args = arguments.split(":"); for(String arg : args){ String newAtt = null; String [] attributes = arg.split(","); for(String att : attributes){ String [] pair = att.split("="); if (pair[0].equals("role") && RoleRestrMap.containsKey(pair[1])) { String newRole = pair[0].concat("=").concat(RoleRestrMap.get(pair[1])); if (newAtt.equals(null)) { newAtt = newRole; } else { newAtt = newAtt.concat(",").concat(newRole); } } else { if (newAtt ==null) { newAtt = att; } else { newAtt = newAtt.concat(",").concat(att); } } } newArgs.add(newAtt); } return newArgs; } /** * This method created LMF classes and * stores the extracted VerbNet information in these LMF classes * */ private void convertVerbNetInput() { lexicon.setLanguageIdentifier(ELanguageIdentifier.ENGLISH); lexicon.setId("VN_Lexicon_0"); lexicon.setName(resourceID); // Create subclasses of Lexicon that are independent of particular lexemes List<SubcategorizationFrame> subcategorizationFrames = new LinkedList <SubcategorizationFrame>(); List<SemanticPredicate> semanticPredicates = new LinkedList <SemanticPredicate>(); List<SubcategorizationFrameSet> subcategorizationFramesSets = new LinkedList <SubcategorizationFrameSet>(); List<SynSemCorrespondence> synSemCorrespondences = new LinkedList<SynSemCorrespondence>(); for (VerbNetSense vnSense : listOfVerbNetSenses) { // Create SemanticPredicates if (!predSemPredicateMap.containsKey(vnSense.predicate)) { SemanticPredicate semanticPredicate = new SemanticPredicate(); semanticPredicate.setId("VN_SemanticPredicate_".concat(Integer.toString(semanticPredNumber))); semanticPredNumber++; List<Definition> definitions = new ArrayList<Definition>(); // Create Definitions Definition definition = new Definition(); List<TextRepresentation> textReps = new ArrayList<TextRepresentation>(); // Create TextRepresentations TextRepresentation textRep = new TextRepresentation(); textRep.setWrittenText(vnSense.predicate); textReps.add(textRep); definition.setTextRepresentations(textReps); // Save TextRepresentations definitions.add(definition); semanticPredicate.setDefinitions(definitions); // Save Definitions predSemPredicateMap.put(vnSense.predicate,semanticPredicate); } } for (VerbNetSense vnSense : listOfVerbNetSenses) { // Create SubcatFrames if (!synargsSubcatFrameMap.containsKey(vnSense.synArgs)) { SubcategorizationFrame subcategorizationFrame = new SubcategorizationFrame(); subcategorizationFrame.setId("VN_SubcategorizationFrame_".concat(Integer.toString(subcatFrameNumber))); subcatFrameNumber++; subcategorizationFrame = parseArguments(vnSense,subcategorizationFrame); synargsSubcatFrameMap.put(vnSense.synArgs,subcategorizationFrame); } // Create Semantic Arguments (if not already done), establish SynSemCorrespondence if (!predsynsemargsSynSemCorrMap.containsKey(vnSense.synSemArgs.add(vnSense.predicate))) { SubcategorizationFrame subcategorizationFrame = synargsSubcatFrameMap.get(vnSense.synArgs); parseSemanticArguments(vnSense,subcategorizationFrame); } } for (VerbNetSense vnSense : listOfVerbNetSenses) { // Create SubcatFrameSets String [] classInfo = vnSense.classInformation.split("\\("); String superClass = classInfo[1]; superClass = superClass.replaceAll("\\)", ""); if (!classSubcatFrameSetMap.containsKey(classInfo[0])) { SubcategorizationFrameSet subcategorizationFrameSet = new SubcategorizationFrameSet(); subcategorizationFrameSet.setId("VN_SubcategorizationFrameSet_".concat(Integer.toString(subcatFrameSetNumber))); subcategorizationFrameSet.setName(classInfo[0]); subcatFrameSetNumber++; if (!superClass.equals("NULL")) { // inherits attribute needs to be set // super class might be empty, therefore create a new SubcatFrameSet NOW! // empty classes (without verbs) are the reason why the mapping is defined between className and // SubcatFrameSet, rather than between classInformation = class(superClass) and SubcatFrameSet if (!classSubcatFrameSetMap.containsKey(superClass)) { SubcategorizationFrameSet superFrameSet = new SubcategorizationFrameSet(); superFrameSet.setId("VN_SubcategorizationFrameSet_".concat(Integer.toString(subcatFrameSetNumber))); superFrameSet.setName(superClass); subcatFrameSetNumber++; classSubcatFrameSetMap.put(superClass,superFrameSet); subcategorizationFrameSet.setParentSubcatFrameSet(superFrameSet); } else { subcategorizationFrameSet.setParentSubcatFrameSet(classSubcatFrameSetMap.get(superClass)); } } classSubcatFrameSetMap.put(classInfo[0],subcategorizationFrameSet); senseSubcatFrameSetMap.put(vnSense, subcategorizationFrameSet); if (classSCframeElementsMap.get(classInfo[0]) == null) { Set<SubcategorizationFrame> scFrames = new LinkedHashSet<SubcategorizationFrame>(); scFrames.add(synargsSubcatFrameMap.get(vnSense.synArgs)); classSCframeElementsMap.put(classInfo[0], scFrames); } else { Set<SubcategorizationFrame> scFrames = classSCframeElementsMap.get(classInfo[0]); scFrames.add(synargsSubcatFrameMap.get(vnSense.synArgs)); classSCframeElementsMap.put(classInfo[0], scFrames); } } else { senseSubcatFrameSetMap.put(vnSense, classSubcatFrameSetMap.get(classInfo[0])); if (classSCframeElementsMap.get(classInfo[0]) == null) { Set<SubcategorizationFrame> scFrames = new LinkedHashSet<SubcategorizationFrame>(); scFrames.add(synargsSubcatFrameMap.get(vnSense.synArgs)); classSCframeElementsMap.put(classInfo[0], scFrames); } else { Set<SubcategorizationFrame> scFrames = classSCframeElementsMap.get(classInfo[0]); scFrames.add(synargsSubcatFrameMap.get(vnSense.synArgs)); classSCframeElementsMap.put(classInfo[0], scFrames); } } } // Add SubcatFrameElements to SubcategorizationFrameSet Iterator<String> classIterator = classSubcatFrameSetMap.keySet().iterator(); while (classIterator.hasNext()) { String classKey = classIterator.next(); SubcategorizationFrameSet subcatFrameSet = classSubcatFrameSetMap.get(classKey); if (classSCframeElementsMap.get(classKey) != null) { List<SubcatFrameSetElement> subcatFrameSetElements = new LinkedList<SubcatFrameSetElement>(); Iterator<SubcategorizationFrame> frameIterator = classSCframeElementsMap.get(classKey).iterator(); while (frameIterator.hasNext()) { SubcategorizationFrame scFrame = frameIterator.next(); SubcatFrameSetElement subcatFrameSetElement = new SubcatFrameSetElement(); subcatFrameSetElement.setElement(scFrame); subcatFrameSetElements.add(subcatFrameSetElement); } subcatFrameSet.setSubcatFrameSetElements(subcatFrameSetElements); classSubcatFrameSetMap.put(classKey, subcatFrameSet); } } synSemCorrespondences.addAll(predsynsemargsSynSemCorrMap.values()); lexicon.setSynSemCorrespondences(synSemCorrespondences); semanticPredicates.addAll(predSemPredicateMap.values()); Collections.sort(semanticPredicates); lexicon.setSemanticPredicates(semanticPredicates); subcategorizationFrames.addAll(synargsSubcatFrameMap.values()); Collections.sort(subcategorizationFrames); lexicon.setSubcategorizationFrames(subcategorizationFrames); subcategorizationFramesSets.addAll(classSubcatFrameSetMap.values()); lexicon.setSubcategorizationFrameSets(subcategorizationFramesSets); List<LexicalEntry> lexicalEntries = new LinkedList<LexicalEntry>(); Iterator<String> keyIterator = LemmaVerbNetSenseMappings.keySet().iterator(); while (keyIterator.hasNext()) { String verbLemma = keyIterator.next(); LexicalEntry lexicalEntry = new LexicalEntry(); // Create ID lexicalEntry.setId("VN_LexicalEntry_".concat(Integer.toString(lexicalEntryNumber))); lexicalEntryNumber++; // Create partOfSpeech lexicalEntry.setPartOfSpeech(EPartOfSpeech.verb); // Creating Lemma Lemma lemma = new Lemma(); // Create FormRepresentation List<FormRepresentation> formReps = new ArrayList<FormRepresentation>(); FormRepresentation formRep = new FormRepresentation(); formRep.setWrittenForm(verbLemma.replaceAll("_", " ")); // Extract FormRepresentation formRep.setLanguageIdentifier(ELanguageIdentifier.ENGLISH); formReps.add(formRep); // Save FormRepresentation lemma.setFormRepresentations(formReps); // Save FormRepresentations lexicalEntry.setLemma(lemma); // Save Lemma // Creating Senses List <Sense> senses = new ArrayList<Sense>(); // Creating SyntacticBehaviors List<SyntacticBehaviour> syntacticBehaviours = new LinkedList <SyntacticBehaviour>(); Iterator<VerbNetSense> senseIterator = LemmaVerbNetSenseMappings.get(verbLemma).iterator(); int senseIndex = 1; //starts with 1 while (senseIterator.hasNext()) { VerbNetSense vnSense = senseIterator.next(); Sense sense = new Sense(); sense.setId("VN_Sense_".concat(Integer.toString(senseNumber))); sense.setIndex(senseIndex); senseIndex++; senseNumber++; String [] classInfo = vnSense.classInformation.split("\\("); MonolingualExternalRef monolingualExternalRef = new MonolingualExternalRef(); monolingualExternalRef.setExternalSystem(resourceVersion + "_" + SENSE); monolingualExternalRef.setExternalReference(vnSense.lemma + "_" + classInfo[0]); List<MonolingualExternalRef> monolingualExternalRefs = new LinkedList<MonolingualExternalRef>(); monolingualExternalRefs.add(monolingualExternalRef); sense.setMonolingualExternalRefs(monolingualExternalRefs); List<SemanticLabel> semanticLabels = new ArrayList<SemanticLabel>(); SemanticLabel semanticLabel = new SemanticLabel(); semanticLabel.setLabel(classInfo[0]); semanticLabel.setType(ELabelTypeSemantics.verbnetClass); semanticLabels.add(semanticLabel); sense.setSemanticLabels(semanticLabels); List<SenseExample> examples = new ArrayList<SenseExample>(); // Create SenseExamples SenseExample example = new SenseExample(); example.setId("VN_SenseExample_".concat(Integer.toString(senseExampleNumber))); senseExampleNumber++; example.setExampleType(EExampleType.subcatFrame); List<TextRepresentation> exTextReps = new ArrayList<TextRepresentation>(); // Create TextRepresentations TextRepresentation exTextRep = new TextRepresentation(); exTextRep.setWrittenText(vnSense.example); exTextReps.add(exTextRep); example.setTextRepresentations(exTextReps); // Save TextRepresentations examples.add(example); sense.setSenseExamples(examples); // Save SenseExamples // Creating SyntacticBehaviour (one for each VerbNet sense) SyntacticBehaviour syntacticBehaviour = new SyntacticBehaviour(); // Generating an ID syntacticBehaviour.setId("VN_SyntacticBehaviour_".concat(Integer.toString(syntacticBehaviourNumber))); syntacticBehaviourNumber++; syntacticBehaviour.setSense(sense); syntacticBehaviour.setSubcategorizationFrame(synargsSubcatFrameMap.get(vnSense.synArgs)); syntacticBehaviour.setSubcategorizationFrameSet(senseSubcatFrameSetMap.get(vnSense)); syntacticBehaviours.add(syntacticBehaviour); // Creating Predicative Representation (one for each VerbNet sense) List<PredicativeRepresentation> predicativeRepresentations = new LinkedList <PredicativeRepresentation>(); PredicativeRepresentation predicativeRepresentation = new PredicativeRepresentation(); predicativeRepresentation.setPredicate(predSemPredicateMap.get(vnSense.predicate)); predicativeRepresentations.add(predicativeRepresentation); sense.setPredicativeRepresentations(predicativeRepresentations); // Save PredicativeRepresentations senses.add(sense); // Save Sense } //Save subclasses of lexicalEntry that require the complete VerbNet-Input to be processed lexicalEntry.setSenses(senses); lexicalEntry.setSyntacticBehaviours(syntacticBehaviours); lexicalEntries.add(lexicalEntry); // Save LexicalEntry } lexicon.setLexicalEntries(lexicalEntries); System.out.println("Statistics"); System.out.println(lexicalEntryNumber+" LexicalEntries"); System.out.println(senseNumber+" Senses"); System.out.println(subcatFrameSetNumber+" SubcatFrameSets"); System.out.println(subcatFrameNumber+" SubcategorizationFrames"); System.out.println(semanticPredNumber+" SemanticPredicates"); System.out.println(synSemCorrNumber+" SynSemCorrespondences"); System.out.println(syntacticArgumentNumber+" SyntacticArguments"); System.out.println(semanticArgumentNumber+" SemanticArguments"); } /** * This method creates semantic predicates and * establishes a mapping between semantic arguments * and syntactic arguments * @param vnSense a VerbNet sense * @param subcategorizationFrame */ private void parseSemanticArguments(VerbNetSense vnSense,SubcategorizationFrame subcategorizationFrame) { SemanticPredicate semanticPredicate = predSemPredicateMap.get(vnSense.predicate); // list of mappings between Syntactic and Semantic Arguments are to be created List<SynSemArgMap> synSemArgMaps = new LinkedList<SynSemArgMap>(); SynSemArgMap synSemArgMap = null; if (semanticPredicate.getSemanticArguments() == null) { List<SemanticArgument> semanticArguments = new LinkedList<SemanticArgument>(); int index = 0; // iterate over syntactic Arguments for (SyntacticArgument synArg: subcategorizationFrame.getSyntacticArguments()) { String synsemArg = vnSense.synSemArgs.get(index); if (synsemArg.contains("syntacticProperty")) { index++; synsemArg = vnSense.synSemArgs.get(index); } // look at synsemArg: is thematic role defined? if yes: create corresponding semanticArg String[] atts = synsemArg.split(","); for(String att : atts){ String [] splits = att.split("="); String attName = splits[0]; if(attName.equals("role")){ SemanticArgument semanticArgument = new SemanticArgument(); semanticArgument.setId("VN_SemanticArgument_".concat(Integer.toString(semanticArgumentNumber))); semanticArgumentNumber++; String semArg = splits[1]; if (semArg.matches(".*\\[.*\\]")) { String [] parts = semArg.split("\\["); String semRole = parts[0]; String selRes = "[" +parts[1]; semanticArgument.setSemanticRole(semRole); List<SemanticLabel> semanticLabels = new LinkedList<SemanticLabel>(); SemanticLabel semanticLabel = new SemanticLabel(); semanticLabel.setLabel(selRes); semanticLabel.setType(ELabelTypeSemantics.selectionalPreference); semanticLabels.add(semanticLabel); semanticArgument.setSemanticLabels(semanticLabels); } else { semanticArgument.setSemanticRole(splits[1]); } semanticArguments.add(semanticArgument); // Generate SynSemArgMapping synSemArgMap = new SynSemArgMap(); synSemArgMap.setSyntacticArgument(synArg); synSemArgMap.setSemanticArgument(semanticArgument); synSemArgMaps.add(synSemArgMap); } } index++; } semanticPredicate.setSemanticArguments(semanticArguments); predSemPredicateMap.put(vnSense.predicate,semanticPredicate); // save extended predicate in Mapping } else { // Semantic Arguments have already been created for this predicate; SynSemCorr needs to be established List<SemanticArgument> semArgs = semanticPredicate.getSemanticArguments(); int semIndex = 0; int max = semArgs.size(); int synsemIndex = 0; // iterate over syntactic Arguments for (SyntacticArgument synArg: subcategorizationFrame.getSyntacticArguments()) { String synsemArg = vnSense.synSemArgs.get(synsemIndex); if (synsemArg.contains("syntacticProperty")) { synsemIndex++; synsemArg = vnSense.synSemArgs.get(synsemIndex); } // look at synsemArg: is thematic role defined? if yes: create SynSemArgMap String[] atts = synsemArg.split(","); for(String att : atts){ String [] splits = att.split("="); String attName = splits[0]; if (attName.equals("role")){ if (semIndex >= max) { // this subcatFrame contains roles that were not present in previous subcatFrames with the same predicate SemanticArgument semanticArgument = new SemanticArgument(); semanticArgument.setId("VN_SemanticArgument_".concat(Integer.toString(semanticArgumentNumber))); semanticArgumentNumber++; String semArg = splits[1]; if (semArg.matches("\\[")) { String [] parts = att.split("\\["); String semRole = parts[0]; String selRes = "[" +parts[1]; semanticArgument.setSemanticRole(semRole); List<SemanticLabel> semanticLabels = new LinkedList<SemanticLabel>(); SemanticLabel semanticLabel = new SemanticLabel(); semanticLabel.setLabel(selRes); semanticLabel.setType(ELabelTypeSemantics.selectionalPreference); semanticLabels.add(semanticLabel); semanticArgument.setSemanticLabels(semanticLabels); } else { semanticArgument.setSemanticRole(splits[1]); } semArgs.add(semanticArgument); // Generate SynSemArgMapping synSemArgMap = new SynSemArgMap(); synSemArgMap.setSyntacticArgument(synArg); synSemArgMap.setSemanticArgument(semanticArgument); synSemArgMaps.add(synSemArgMap); semanticPredicate.setSemanticArguments(semArgs); predSemPredicateMap.put(vnSense.predicate,semanticPredicate); // save extended predicate in Mapping } else { SemanticArgument semArg = semArgs.get(semIndex); // Create SynSemArgMap synSemArgMap = new SynSemArgMap(); synSemArgMap.setSyntacticArgument(synArg); synSemArgMap.setSemanticArgument(semArg); synSemArgMaps.add(synSemArgMap); semIndex++; } } } synsemIndex++; } } SynSemCorrespondence synSemCorrespondence = new SynSemCorrespondence(); synSemCorrespondence.setId("VN_SynSemCorrespondence_".concat(Integer.toString(synSemCorrNumber))); synSemCorrNumber++; synSemCorrespondence.setSynSemArgMaps(synSemArgMaps); List<String> predsynsemargs = vnSense.synSemArgs; vnSense.synSemArgs.add(vnSense.predicate); predsynsemargsSynSemCorrMap.put(predsynsemargs,synSemCorrespondence); } /** * This method creates (purely syntactic) subcategorization frames * @param vnSense a VerbNet sense * @param subcatFrame a subcategorization frame * @return the subcategorization frame */ private SubcategorizationFrame parseArguments(VerbNetSense vnSense, SubcategorizationFrame subcatFrame) { SubcategorizationFrame scFrame = subcatFrame; List<SyntacticArgument> synArgs = new LinkedList<SyntacticArgument>(); for(String arg : vnSense.synSemArgs) { if (!arg.contains("syntacticProperty")) { SyntacticArgument syntacticArgument = new SyntacticArgument(); syntacticArgument.setId("VN_SyntacticArgument_".concat(Integer.toString(syntacticArgumentNumber))); syntacticArgumentNumber++; String[] atts = arg.split(","); for(String att : atts){ String [] splits = att.split("="); String attName = splits[0]; if (attName.equals("grammaticalFunction")){ String gf = splits[1]; if (gf.equals("object")) { gf = gf.replaceAll("object", "directObject"); } syntacticArgument.setGrammaticalFunction(EGrammaticalFunction.valueOf(gf)); } if(attName.equals("syntacticCategory")) { syntacticArgument.setSyntacticCategory(ESyntacticCategory.valueOf(splits[1])); } if(attName.equals("case")) { syntacticArgument.setCase(ECase.valueOf(splits[1])); } if(attName.equals("determiner")) { syntacticArgument.setDeterminer(EDeterminer.valueOf(splits[1])); } if(attName.equals("preposition")) { syntacticArgument.setPreposition(splits[1]); } if(attName.equals("prepositionType")) { syntacticArgument.setPrepositionType(splits[1]); } if(attName.equals("number")) { syntacticArgument.setNumber(EGrammaticalNumber.valueOf(splits[1])); } if(attName.equals("lexeme")) { syntacticArgument.setLexeme(splits[1]); } if(attName.equals("verbForm")) { syntacticArgument.setVerbForm(EVerbForm.valueOf(splits[1])); } if(attName.equals("tense")) { syntacticArgument.setTense(ETense.valueOf(splits[1])); } if(attName.equals("complementizer")) { syntacticArgument.setComplementizer(EComplementizer.valueOf(splits[1])); } } synArgs.add(syntacticArgument); } else { String [] splits = arg.split("="); String sp = splits[1]; if (sp.equals("raising")) { sp = sp.replaceAll("raising", "subjectRaising"); } LexemeProperty lexemeProperty = new LexemeProperty(); lexemeProperty.setSyntacticProperty(ESyntacticProperty.valueOf(sp)); scFrame.setLexemeProperty(lexemeProperty); } } scFrame.setSyntacticArguments(synArgs); return scFrame; } }