/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.imslex; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.io.Reader; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import de.tudarmstadt.ukp.lmf.model.core.LexicalEntry; import de.tudarmstadt.ukp.lmf.model.core.Lexicon; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.model.enums.ECase; import de.tudarmstadt.ukp.lmf.model.enums.EComplementizer; import de.tudarmstadt.ukp.lmf.model.enums.EDeterminer; import de.tudarmstadt.ukp.lmf.model.enums.EGrammaticalFunction; import de.tudarmstadt.ukp.lmf.model.enums.EGrammaticalNumber; import de.tudarmstadt.ukp.lmf.model.enums.ELabelTypeSemantics; import de.tudarmstadt.ukp.lmf.model.enums.ELanguageIdentifier; import de.tudarmstadt.ukp.lmf.model.enums.EPartOfSpeech; import de.tudarmstadt.ukp.lmf.model.enums.ESyntacticCategory; import de.tudarmstadt.ukp.lmf.model.enums.ESyntacticProperty; import de.tudarmstadt.ukp.lmf.model.enums.ETense; import de.tudarmstadt.ukp.lmf.model.enums.EVerbForm; import de.tudarmstadt.ukp.lmf.model.meta.SemanticLabel; import de.tudarmstadt.ukp.lmf.model.morphology.FormRepresentation; import de.tudarmstadt.ukp.lmf.model.morphology.Lemma; import de.tudarmstadt.ukp.lmf.model.semantics.MonolingualExternalRef; import de.tudarmstadt.ukp.lmf.model.semantics.PredicativeRepresentation; import de.tudarmstadt.ukp.lmf.model.semantics.SemanticArgument; import de.tudarmstadt.ukp.lmf.model.semantics.SemanticPredicate; import de.tudarmstadt.ukp.lmf.model.semantics.SynSemArgMap; import de.tudarmstadt.ukp.lmf.model.semantics.SynSemCorrespondence; import de.tudarmstadt.ukp.lmf.model.syntax.LexemeProperty; import de.tudarmstadt.ukp.lmf.model.syntax.SubcatFrameSetElement; import de.tudarmstadt.ukp.lmf.model.syntax.SubcategorizationFrame; import de.tudarmstadt.ukp.lmf.model.syntax.SubcategorizationFrameSet; import de.tudarmstadt.ukp.lmf.model.syntax.SyntacticArgument; import de.tudarmstadt.ukp.lmf.model.syntax.SyntacticBehaviour; /** * This class extracts information from a preprocessed version of IMSLex - Subcategorization Frames and fills in the corresponding LMF classes * @author Eckle-Kohler * @deprecated Use {@link org.dkpro.uby.imslex.IMSLexSubcatConverter} instead. */ @Deprecated public class IMSlexExtractor { public static final String SENSE = "sense"; public Lexicon lexicon = new Lexicon(); private final List<SynSemCorrespondence> synSemCorrespondences = new LinkedList<SynSemCorrespondence>(); private final List<SemanticPredicate> semanticPredicates = new LinkedList <SemanticPredicate>(); private final List<LexicalEntry> lexicalEntries = new LinkedList<LexicalEntry>(); private File lexiconInputFile; // The File containing the preprocessed Input lexicon private String resourceName; // name of the LMF lexicon, i.e. "IMSLexSubcat" // running numbers for IDs private static int lexicalEntryNumber = 0; private static int senseNumber = 0; private static int syntacticBehaviourNumber = 0; private static int subcatFrameSetNumber = 0; private static int subcatFrameNumber = 0; private static int syntacticArgumentNumber = 0; private static int semanticPredicateNumber = 0; private static int semanticArgumentNumber = 0; private static int synSemCorrespondenceNumber = 0; private final String resourceVersion; // Mapping between lemmas and their corresponding IMSlex senses private static Map<String, Set<IMSlexSense>> verbLemmaIMSlexSenseMappings = new TreeMap<String, Set<IMSlexSense>>(); private static Map<String, Set<IMSlexSense>> adjLemmaIMSlexSenseMappings = new TreeMap<String, Set<IMSlexSense>>(); private static Map<String, Set<IMSlexSense>> nounLemmaIMSlexSenseMappings = new TreeMap<String, Set<IMSlexSense>>(); // Mapping between syntactic/semantic arguments (containing sem. role information) and purely syntactic arguments private static Map<String, String> synSemArgSynArgMapping = new TreeMap<String, String>(); // Mapping between LMF-Code of purely syntactic SC-Frame and SubcategorizationFrame private static Map<String, SubcategorizationFrame> synargsSubcatFrameMap = new TreeMap<String, SubcategorizationFrame>(); // Mapping between className and SubcategorizationFrameSet private static Map<String, SubcategorizationFrameSet> classSubcatFrameSetMap = new TreeMap<String, SubcategorizationFrameSet>(); // Mapping between className and set of SubcategorizationFrames private static Map<String, Set<SubcategorizationFrame>> classSCframeElementsMap = new TreeMap<String, Set<SubcategorizationFrame>>(); // Mapping between IMSlexSense and SubcategorizationFrameSet private static Map<IMSlexSense, SubcategorizationFrameSet> senseSubcatFrameSetMap; // Mapping between IMSlexSense and SemanticPredicate private static Map<IMSlexSense, SemanticPredicate> senseSemPredicateMap; // Mapping between IMSlex syntactic information and semantic class information private static Map<String, String> syntaxSemClassMap = new TreeMap<String, String>(); private static List<IMSlexSense> listOfIMSlexSenses = new LinkedList <IMSlexSense>(); /** * Constructs a IMSlexExtractor * @param preprocessedLexicon path of the File containing the preprocessed version of IMSlex * @param resourceName name of the LMF Lexicon instance: "IMSLexSubcat" * @return IMSlexExtractor * @throws IOException */ public IMSlexExtractor(File preprocessedLexicon, String resourceName, String resourceVersion) throws IOException { Comparator<IMSlexSense> comparator = new Comparator<IMSlexSense>() { @Override public int compare(IMSlexSense o1, IMSlexSense o2) { String key1 = o1.lemma + o1.pos + o1.hashCode(); String key2 = o2.lemma + o2.pos + o2.hashCode(); return key1.compareTo(key2); } }; senseSubcatFrameSetMap = new TreeMap<IMSlexSense, SubcategorizationFrameSet>(comparator); senseSemPredicateMap = new TreeMap<IMSlexSense, SemanticPredicate>(comparator); this.lexiconInputFile = preprocessedLexicon; this.resourceName = resourceName; this.resourceVersion = resourceVersion; parsePreprocessedIMSlex(); convertIMSlex(); } /** * This method parses the document containing the lexicon Input * Input has the form: <lemma>%<pos>%<Arg>:...:<Arg>%classInformation * * @throws IOException */ private void parsePreprocessedIMSlex() throws IOException { System.out.print("Parsing preprocessed IMSlex ..."); Reader r = new InputStreamReader(new FileInputStream(lexiconInputFile), "UTF8"); BufferedReader input = new BufferedReader(r); try { String line; String[] parts; Set<IMSlexSense> imsLexSenses = new LinkedHashSet<IMSlexSense>(); // senses that have already been processed for one lemma while ((line = input.readLine()) != null) { parts = line.split("%"); IMSlexSense imsLexSense = new IMSlexSense(parts[0],parts[1],null,parts[2],parts[3]); if (imsLexSense.synArgs.contains("semanticLabel")) { imsLexSense.synArgs = "null"; imsLexSense.classInformation = parts[2]; } if (!imsLexSense.synArgs.equals("null") || !imsLexSense.classInformation.equals("null")) { // skip adjectives without SCF and semantic class information if (syntaxSemClassMap.containsKey(imsLexSense.lemma+"%"+imsLexSense.pos+"%"+imsLexSense.synArgs)) { // there is already an IMSlexSense with the same syntax if (!imsLexSense.classInformation.equals("null") && syntaxSemClassMap.get(imsLexSense.lemma+"%"+imsLexSense.pos+"%"+imsLexSense.synArgs).equals("null")) { //if the class of the existing entry is null, but the class of the new IMSlexSense is not null: //replace the null with the class information syntaxSemClassMap.put(imsLexSense.lemma+"%"+imsLexSense.pos+"%"+imsLexSense.synArgs, imsLexSense.classInformation); } } else { syntaxSemClassMap.put(imsLexSense.lemma+"%"+imsLexSense.pos+"%"+imsLexSense.synArgs, imsLexSense.classInformation); } if (imsLexSense.synArgs.contains("role")) { String pureSynArgs = imsLexSense.synArgs.replaceFirst(",role=[a-z]+", ""); synSemArgSynArgMapping.put(imsLexSense.synArgs, pureSynArgs); } else { synSemArgSynArgMapping.put(imsLexSense.synArgs, imsLexSense.synArgs); } } } Iterator<String> syntaxIterator = syntaxSemClassMap.keySet().iterator(); while (syntaxIterator.hasNext()) { String syntax = syntaxIterator.next(); parts = syntax.split("%"); IMSlexSense sense = new IMSlexSense(parts[0],parts[1],null,parts[2],syntaxSemClassMap.get(syntax)); listOfIMSlexSenses.add(sense); } for (IMSlexSense imsLexSense : listOfIMSlexSenses) { if (imsLexSense.pos.equals("verb")) { if (verbLemmaIMSlexSenseMappings.containsKey(imsLexSense.lemma)) { imsLexSenses = verbLemmaIMSlexSenseMappings.get(imsLexSense.lemma); imsLexSenses.add(imsLexSense); verbLemmaIMSlexSenseMappings.put(imsLexSense.lemma,imsLexSenses); } else { Set<IMSlexSense> newSense = new LinkedHashSet<IMSlexSense>(); newSense.add(imsLexSense); verbLemmaIMSlexSenseMappings.put(imsLexSense.lemma,newSense); } } else if (imsLexSense.pos.equals("adj")) { if (adjLemmaIMSlexSenseMappings.containsKey(imsLexSense.lemma)) { imsLexSenses = adjLemmaIMSlexSenseMappings.get(imsLexSense.lemma); imsLexSenses.add(imsLexSense); adjLemmaIMSlexSenseMappings.put(imsLexSense.lemma,imsLexSenses); } else { Set<IMSlexSense> newSense = new LinkedHashSet<IMSlexSense>(); newSense.add(imsLexSense); adjLemmaIMSlexSenseMappings.put(imsLexSense.lemma,newSense); } } else { //noun if (nounLemmaIMSlexSenseMappings.containsKey(imsLexSense.lemma)) { imsLexSenses = nounLemmaIMSlexSenseMappings.get(imsLexSense.lemma); imsLexSenses.add(imsLexSense); nounLemmaIMSlexSenseMappings.put(imsLexSense.lemma,imsLexSenses); } else { Set<IMSlexSense> newSense = new LinkedHashSet<IMSlexSense>(); newSense.add(imsLexSense); nounLemmaIMSlexSenseMappings.put(imsLexSense.lemma,newSense); } } } System.out.println("done"); } finally { input.close(); r.close(); } } /** * This method creates LMF classes and * fills in the extracted lexical information * */ private void convertIMSlex() { lexicon.setLanguageIdentifier(ELanguageIdentifier.GERMAN); lexicon.setId("IMSLexSubcat_Lexicon_0"); lexicon.setName(resourceName); // Create subclasses of Lexicon that are independent of particular lexemes List<SubcategorizationFrame> subcategorizationFrames = new LinkedList <SubcategorizationFrame>(); List<SubcategorizationFrameSet> subcategorizationFramesSets = new LinkedList <SubcategorizationFrameSet>(); for (IMSlexSense imsLexSense : listOfIMSlexSenses) { if (!imsLexSense.synArgs.equals("null")) { // Create SubcatFrames and SemanticPredicates String synArgs = synSemArgSynArgMapping.get(imsLexSense.synArgs); if (!synargsSubcatFrameMap.containsKey(synArgs)) { SubcategorizationFrame subcategorizationFrame = new SubcategorizationFrame(); subcategorizationFrame.setId("IMSLexSubcat_SubcategorizationFrame_".concat(Integer.toString(subcatFrameNumber))); subcatFrameNumber++; subcategorizationFrame = parseArguments(imsLexSense,subcategorizationFrame); synargsSubcatFrameMap.put(synArgs,subcategorizationFrame); if (imsLexSense.synArgs.contains("role")) { //only few IMSLexSubcat-frames specify a semantic role SemanticPredicate semanticPredicate = new SemanticPredicate(); semanticPredicate = parseSemanticArguments(imsLexSense,subcategorizationFrame); semanticPredicates.add(semanticPredicate); senseSemPredicateMap.put(imsLexSense, semanticPredicate); } } else { SubcategorizationFrame subcategorizationFrame = synargsSubcatFrameMap.get(synArgs); if (imsLexSense.synArgs.contains("role")) { //only few IMSlexSubcat-frames specify a semantic role SemanticPredicate semanticPredicate = new SemanticPredicate(); semanticPredicate = parseSemanticArguments(imsLexSense,subcategorizationFrame); semanticPredicates.add(semanticPredicate); senseSemPredicateMap.put(imsLexSense, semanticPredicate); } } } } for (IMSlexSense imsLexSense : listOfIMSlexSenses) { if (!imsLexSense.synArgs.equals("null") && !imsLexSense.classInformation.equals("null")) { // Create SubcatFrameSets if (!classSubcatFrameSetMap.containsKey(imsLexSense.classInformation)) { SubcategorizationFrameSet subcategorizationFrameSet = new SubcategorizationFrameSet(); subcategorizationFrameSet.setId("IMSLexSubcat_SubcategorizationFrameSet_".concat(Integer.toString(subcatFrameSetNumber))); subcategorizationFrameSet.setName(imsLexSense.classInformation); subcatFrameSetNumber++; classSubcatFrameSetMap.put(imsLexSense.classInformation,subcategorizationFrameSet); senseSubcatFrameSetMap.put(imsLexSense, subcategorizationFrameSet); if (classSCframeElementsMap.get(imsLexSense.classInformation) == null) { Set<SubcategorizationFrame> scFrames = new LinkedHashSet<SubcategorizationFrame>(); scFrames.add(synargsSubcatFrameMap.get(synSemArgSynArgMapping.get(imsLexSense.synArgs))); classSCframeElementsMap.put(imsLexSense.classInformation, scFrames); } else { Set<SubcategorizationFrame> scFrames = classSCframeElementsMap.get(imsLexSense.classInformation); scFrames.add(synargsSubcatFrameMap.get(synSemArgSynArgMapping.get(imsLexSense.synArgs))); classSCframeElementsMap.put(imsLexSense.classInformation, scFrames); } } else { senseSubcatFrameSetMap.put(imsLexSense, classSubcatFrameSetMap.get(imsLexSense.classInformation)); if (classSCframeElementsMap.get(imsLexSense.classInformation) == null) { Set<SubcategorizationFrame> scFrames = new LinkedHashSet<SubcategorizationFrame>(); scFrames.add(synargsSubcatFrameMap.get(synSemArgSynArgMapping.get(imsLexSense.synArgs))); classSCframeElementsMap.put(imsLexSense.classInformation, scFrames); } else { Set<SubcategorizationFrame> scFrames = classSCframeElementsMap.get(imsLexSense.classInformation); scFrames.add(synargsSubcatFrameMap.get(synSemArgSynArgMapping.get(imsLexSense.synArgs))); classSCframeElementsMap.put(imsLexSense.classInformation, scFrames); } } } } if(classSubcatFrameSetMap != null ) { // Add SubcatFrameElements to SubcategorizationFrameSet Iterator<String> classIterator = classSubcatFrameSetMap.keySet().iterator(); while (classIterator.hasNext()) { String classKey = classIterator.next(); SubcategorizationFrameSet subcatFrameSet = classSubcatFrameSetMap.get(classKey); if (classSCframeElementsMap.get(classKey) != null) { List<SubcatFrameSetElement> subcatFrameSetElements = new LinkedList<SubcatFrameSetElement>(); Iterator<SubcategorizationFrame> frameIterator = classSCframeElementsMap.get(classKey).iterator(); while (frameIterator.hasNext()) { SubcategorizationFrame scFrame = frameIterator.next(); SubcatFrameSetElement subcatFrameSetElement = new SubcatFrameSetElement(); subcatFrameSetElement.setElement(scFrame); subcatFrameSetElements.add(subcatFrameSetElement); } subcatFrameSet.setSubcatFrameSetElements(subcatFrameSetElements); classSubcatFrameSetMap.put(classKey, subcatFrameSet); } } } subcategorizationFrames.addAll(synargsSubcatFrameMap.values()); Collections.sort(subcategorizationFrames); lexicon.setSubcategorizationFrames(subcategorizationFrames); subcategorizationFramesSets.addAll(classSubcatFrameSetMap.values()); lexicon.setSubcategorizationFrameSets(subcategorizationFramesSets); // might be null lexicon.setSemanticPredicates(semanticPredicates); // might be null lexicon.setSynSemCorrespondences(synSemCorrespondences); // might be null createLexicalEntries(verbLemmaIMSlexSenseMappings.keySet().iterator(),"verb"); createLexicalEntries(adjLemmaIMSlexSenseMappings.keySet().iterator(),"adj"); createLexicalEntries(nounLemmaIMSlexSenseMappings.keySet().iterator(),"noun"); lexicon.setLexicalEntries(lexicalEntries); System.out.println("Statistics"); System.out.println(lexicalEntryNumber+" LexicalEntries"); System.out.println(senseNumber+" Senses"); System.out.println(subcatFrameSetNumber+" SubcatFrameSets"); System.out.println(subcatFrameNumber+" SubcategorizationFrames"); System.out.println(syntacticArgumentNumber+" SyntacticArguments"); System.out.println(semanticPredicateNumber+" SemanticPredicates"); System.out.println(synSemCorrespondenceNumber+" SynSemCorrespondences"); System.out.println(semanticArgumentNumber+" SemanticArguments"); } /** * This method creates LexicalEntries and * fills in lexical information * */ private void createLexicalEntries(Iterator<String> iterator, String pos) { Iterator<IMSlexSense> senseIterator; while (iterator.hasNext()) { String sourceLemma = iterator.next(); LexicalEntry lexicalEntry = new LexicalEntry(); // Create ID lexicalEntry.setId("IMSLexSubcat_LexicalEntry_".concat(Integer.toString(lexicalEntryNumber))); lexicalEntryNumber++; // Set partOfSpeech lexicalEntry.setPartOfSpeech(mapPOS(pos)); // Create Lemma Lemma lemma = new Lemma(); // Create FormRepresentation List<FormRepresentation> formReps = new ArrayList<FormRepresentation>(); FormRepresentation formRep = new FormRepresentation(); formRep.setLanguageIdentifier(ELanguageIdentifier.GERMAN); if (sourceLemma.contains("#")) { // then it is a verb with separable prefix, the prefix marked by # String[] parts = sourceLemma.split("#"); String prefix = parts[0]; lexicalEntry.setSeparableParticle(prefix); String newVerbLemma = prefix.concat(parts[1]); formRep.setWrittenForm(newVerbLemma); } else { formRep.setWrittenForm(sourceLemma); } formReps.add(formRep); // Save FormRepresentation lemma.setFormRepresentations(formReps); // Save FormRepresentations lexicalEntry.setLemma(lemma); // Save Lemma // Create Senses List <Sense> senses = new ArrayList<Sense>(); // Create SyntacticBehavior List<SyntacticBehaviour> syntacticBehaviours = new LinkedList <SyntacticBehaviour>(); if (pos.equals("verb")) { senseIterator = verbLemmaIMSlexSenseMappings.get(sourceLemma).iterator(); } else if (pos.equals("adj")) { senseIterator = adjLemmaIMSlexSenseMappings.get(sourceLemma).iterator(); } else { senseIterator = nounLemmaIMSlexSenseMappings.get(sourceLemma).iterator(); } while (senseIterator.hasNext()) { IMSlexSense imsLexSense = senseIterator.next(); Sense sense = new Sense(); sense.setId("IMSLexSubcat_Sense_".concat(Integer.toString(senseNumber))); sense.setIndex(senseNumber); senseNumber++; MonolingualExternalRef monolingualExternalRef = new MonolingualExternalRef(); monolingualExternalRef.setExternalSystem(resourceVersion + "_" + SENSE); monolingualExternalRef.setExternalReference(imsLexSense.lemma); List<MonolingualExternalRef> monolingualExternalRefs = new LinkedList<MonolingualExternalRef>(); monolingualExternalRefs.add(monolingualExternalRef); sense.setMonolingualExternalRefs(monolingualExternalRefs); if (!imsLexSense.classInformation.equals("null")) { List<SemanticLabel> semanticLabels = new ArrayList<SemanticLabel>(); SemanticLabel semanticLabel = new SemanticLabel(); semanticLabel.setLabel(imsLexSense.classInformation); semanticLabel.setType(getTypeOfSemanticLabel(imsLexSense.classInformation)); semanticLabels.add(semanticLabel); sense.setSemanticLabels(semanticLabels); } if (!imsLexSense.synArgs.equals("null")) { // Creating SyntacticBehaviour (one for each sense) SyntacticBehaviour syntacticBehaviour = new SyntacticBehaviour(); // Generating an ID syntacticBehaviour.setId("IMSLexSubcat_SyntacticBehaviour_".concat(Integer.toString(syntacticBehaviourNumber))); syntacticBehaviourNumber++; syntacticBehaviour.setSense(sense); syntacticBehaviour.setSubcategorizationFrame(synargsSubcatFrameMap.get(synSemArgSynArgMapping.get(imsLexSense.synArgs))); syntacticBehaviour.setSubcategorizationFrameSet(senseSubcatFrameSetMap.get(imsLexSense)); syntacticBehaviours.add(syntacticBehaviour); if (senseSemPredicateMap.containsKey(imsLexSense)) { // Creating Predicative Representation List<PredicativeRepresentation> predicativeRepresentations = new LinkedList <PredicativeRepresentation>(); PredicativeRepresentation predicativeRepresentation = new PredicativeRepresentation(); predicativeRepresentation.setPredicate(senseSemPredicateMap.get(imsLexSense)); predicativeRepresentations.add(predicativeRepresentation); sense.setPredicativeRepresentations(predicativeRepresentations);// Save PredicativeRepresentations } } senses.add(sense);// Save Sense } lexicalEntry.setSenses(senses); if (syntacticBehaviours != null) { lexicalEntry.setSyntacticBehaviours(syntacticBehaviours); } lexicalEntries.add(lexicalEntry);// Save LexicalEntry } } private ELabelTypeSemantics getTypeOfSemanticLabel(String classInformation) { if (classInformation.contains("Noun")) { return ELabelTypeSemantics.semanticNounClass; } else { return ELabelTypeSemantics.syntacticAlternationClass; } } /** * This method maps POS information to EPartOfSpeech * */ private EPartOfSpeech mapPOS(String pos) { EPartOfSpeech result = null; if (pos.equals("verb")) { result = EPartOfSpeech.verbMain; } else if (pos.equals("adj")) { result = EPartOfSpeech.adjective; } else if (pos.equals("noun")) { result = EPartOfSpeech.nounCommon; } return result; } /** * This method creates (purely syntactic) subcategorization frames * @param IMSlexSubcatSense a IMSlexSubcat sense * @param subcatFrame a subcategorization frame * @return the subcategorization frame */ private SubcategorizationFrame parseArguments(IMSlexSense IMSlexSubcatSense, SubcategorizationFrame subcatFrame) { SubcategorizationFrame scFrame = subcatFrame; List<SyntacticArgument> synArgs = new LinkedList<SyntacticArgument>(); String[] args = IMSlexSubcatSense.synArgs.split(":"); for(String arg : args) { if (!arg.contains("syntacticProperty")) { SyntacticArgument syntacticArgument = new SyntacticArgument(); syntacticArgument.setId("IMSLexSubcat_SyntacticArgument_".concat(Integer.toString(syntacticArgumentNumber))); syntacticArgumentNumber++; String[] atts = arg.split(","); for(String att : atts){ String [] splits = att.split("="); String attName = splits[0]; if (attName.equals("grammaticalFunction")){ String gf = splits[1]; if (gf.equals("object")) { gf = gf.replaceAll("object", "directObject"); } syntacticArgument.setGrammaticalFunction(EGrammaticalFunction.valueOf(gf)); } if(attName.equals("syntacticCategory")) { syntacticArgument.setSyntacticCategory(ESyntacticCategory.valueOf(splits[1])); } if(attName.equals("case")) { syntacticArgument.setCase(ECase.valueOf(splits[1])); } if(attName.equals("determiner")) { syntacticArgument.setDeterminer(EDeterminer.valueOf(splits[1])); } if(attName.equals("preposition")) { syntacticArgument.setPreposition(splits[1]); } if(attName.equals("prepositionType")) { syntacticArgument.setPrepositionType(splits[1]); } if(attName.equals("number")) { syntacticArgument.setNumber(EGrammaticalNumber.valueOf(splits[1])); } if(attName.equals("lexeme")) { syntacticArgument.setLexeme(splits[1]); } if(attName.equals("verbForm")) { syntacticArgument.setVerbForm(EVerbForm.valueOf(splits[1])); } if(attName.equals("tense")) { syntacticArgument.setTense(ETense.valueOf(splits[1])); } if(attName.equals("complementizer")) { syntacticArgument.setComplementizer(EComplementizer.valueOf(splits[1])); } } synArgs.add(syntacticArgument); } else { String [] splits = arg.split("="); String sp = splits[1]; if (sp.equals("raising")) { sp = sp.replaceAll("raising", "subjectRaising"); } LexemeProperty lexemeProperty = new LexemeProperty(); lexemeProperty.setSyntacticProperty(ESyntacticProperty.valueOf(sp)); scFrame.setLexemeProperty(lexemeProperty); } } scFrame.setSyntacticArguments(synArgs); return scFrame; } /** * This method creates semantic predicates and * establishes a mapping between semantic arguments * and syntactic arguments * @param IMSlexSubcatSense a IMSlexSubcat sense * @param subcategorizationFrame a subcategorization frame * @returns the semantic predicate */ private SemanticPredicate parseSemanticArguments(IMSlexSense IMSlexSubcatSense,SubcategorizationFrame subcategorizationFrame) { // list of mappings between Syntactic and Semantic Arguments are to be created SemanticPredicate semanticPredicate = new SemanticPredicate(); semanticPredicate.setId("IMSLexSubcat_SemanticPredicate_".concat(Integer.toString(semanticPredicateNumber))); semanticPredicateNumber++; List<SemanticArgument> semanticArguments = new LinkedList<SemanticArgument>(); List<SynSemArgMap> synSemArgMaps = new LinkedList<SynSemArgMap>(); SynSemArgMap synSemArgMap = new SynSemArgMap(); String[] args = IMSlexSubcatSense.synArgs.split(":"); int index = 0; // iterate over syntactic Arguments for (SyntacticArgument synArg: subcategorizationFrame.getSyntacticArguments()) { String synsemArg = args[index]; if (synsemArg.contains("syntacticProperty")) { index++; synsemArg = args[index]; } // look at synsemArg: is semantic role defined? if yes: create corresponding semanticArg String[] atts = synsemArg.split(","); for(String att : atts){ String [] splits = att.split("="); String attName = splits[0]; if(attName.equals("role")){ SemanticArgument semanticArgument = new SemanticArgument(); semanticArgument.setId("IMSLexSubcat_SemanticArgument_".concat(Integer.toString(semanticArgumentNumber))); semanticArgumentNumber++; semanticArgument.setSemanticRole(splits[1]); semanticArguments.add(semanticArgument); // Generate SynSemArgMapping synSemArgMap.setSyntacticArgument(synArg); synSemArgMap.setSemanticArgument(semanticArgument); synSemArgMaps.add(synSemArgMap); } } index++; } semanticPredicate.setSemanticArguments(semanticArguments); SynSemCorrespondence synSemCorrespondence = new SynSemCorrespondence(); synSemCorrespondence.setId("IMSLexSubcat_SynSemCorrespondence_".concat(Integer.toString(synSemCorrespondenceNumber))); synSemCorrespondenceNumber++; synSemCorrespondence.setSynSemArgMaps(synSemArgMaps); synSemCorrespondences.add(synSemCorrespondence); return semanticPredicate; } }