/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package org.dkpro.uby.imslex; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.TreeMap; import de.tudarmstadt.ukp.lmf.model.core.GlobalInformation; import de.tudarmstadt.ukp.lmf.model.core.LexicalEntry; import de.tudarmstadt.ukp.lmf.model.core.LexicalResource; import de.tudarmstadt.ukp.lmf.model.core.Lexicon; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.model.enums.ECase; import de.tudarmstadt.ukp.lmf.model.enums.EComplementizer; import de.tudarmstadt.ukp.lmf.model.enums.EDeterminer; import de.tudarmstadt.ukp.lmf.model.enums.EGrammaticalFunction; import de.tudarmstadt.ukp.lmf.model.enums.EGrammaticalNumber; import de.tudarmstadt.ukp.lmf.model.enums.ELabelNameSemantics; import de.tudarmstadt.ukp.lmf.model.enums.ELabelTypeSemantics; import de.tudarmstadt.ukp.lmf.model.enums.ELanguageIdentifier; import de.tudarmstadt.ukp.lmf.model.enums.EPartOfSpeech; import de.tudarmstadt.ukp.lmf.model.enums.ESyntacticCategory; import de.tudarmstadt.ukp.lmf.model.enums.ESyntacticProperty; import de.tudarmstadt.ukp.lmf.model.enums.ETense; import de.tudarmstadt.ukp.lmf.model.enums.EVerbForm; import de.tudarmstadt.ukp.lmf.model.meta.SemanticLabel; import de.tudarmstadt.ukp.lmf.model.morphology.FormRepresentation; import de.tudarmstadt.ukp.lmf.model.morphology.Lemma; import de.tudarmstadt.ukp.lmf.model.semantics.MonolingualExternalRef; import de.tudarmstadt.ukp.lmf.model.semantics.PredicativeRepresentation; import de.tudarmstadt.ukp.lmf.model.semantics.SemanticArgument; import de.tudarmstadt.ukp.lmf.model.semantics.SemanticPredicate; import de.tudarmstadt.ukp.lmf.model.semantics.SynSemArgMap; import de.tudarmstadt.ukp.lmf.model.semantics.SynSemCorrespondence; import de.tudarmstadt.ukp.lmf.model.syntax.LexemeProperty; import de.tudarmstadt.ukp.lmf.model.syntax.SubcategorizationFrame; import de.tudarmstadt.ukp.lmf.model.syntax.SyntacticArgument; import de.tudarmstadt.ukp.lmf.model.syntax.SyntacticBehaviour; /** * This class converts IMSLex - Subcategorization Frames (SCF) * (see PhD thesis by Judith Eckle-Kohler, 1999) to UBY-LMF. * This lexical resource provides SCFs of verbs, nouns and adjectives * as well as some additional syntactic and semantic properties of * nouns and adjectives. */ public class IMSLexSubcatConverter { protected static class IMSLexEntry { protected String lemma; protected EPartOfSpeech pos; protected List<IMSLexSubcatFrame> subcatFrames; public IMSLexEntry(final String lemma, final EPartOfSpeech pos) { this.lemma = lemma; this.pos = pos; subcatFrames = new ArrayList<IMSLexSubcatFrame>(); } public String getLemma() { return lemma; } public EPartOfSpeech getPos() { return pos; } public void addSubcatFrame(final IMSLexSubcatFrame subcatFrame) { subcatFrames.add(subcatFrame); } public List<IMSLexSubcatFrame> getSubcatFrames() { return subcatFrames; } } protected static class IMSLexSubcatFrame { protected String subcatLabel; protected String auxiliary; protected String argumentStr; protected String semanticLabel; public IMSLexSubcatFrame(final String subcatLabel, final String semanticLabel, final String auxiliary, final String argumentStr) { this.subcatLabel = subcatLabel; this.auxiliary = auxiliary; this.argumentStr = argumentStr; this.semanticLabel = semanticLabel; } public String getSubcatLabel() { return subcatLabel; } public String getAuxiliary() { return auxiliary; } public String getArgumentStr() { return argumentStr; } public String getSemanticLabel() { return semanticLabel; } public String getSyntaxStr() { return argumentStr.replaceAll(",role=[a-z]+", ""); } } public static final String EXTREF_LEMMA = "lemma"; protected Map<String, IMSLexEntry> lexicalEntryIndex; protected Map<String, IMSLexSubcatFrame> subcatIndex; protected String resourceName; protected String resourceVersion; protected String dtdVersion; protected IMSLexSubcatMap subcatMap; protected int syntacticArgumentId = 0; /** Reads the subcategorization frame files of IMSLexSubcat. * Use {@link #toLMF()} to begin the actual conversion process. * @param lexiconDir the directory path containing the IMSLexSubcat files. * @param resourceName the resource name (e.g., "IMSLexSubcat") * @param resourceVersion the resource name (e.g., "IMSLex_2012-06-17_deu") * @param dtdVersion name of UBY's DTD file * @throws IOException */ public IMSLexSubcatConverter(final File lexiconDir, final String resourceName, final String resourceVersion, final String dtdVersion) throws IOException { this.resourceName = resourceName; this.resourceVersion = resourceVersion; this.dtdVersion = dtdVersion; loadIMSLexSubcatFiles(lexiconDir); } protected void loadIMSLexSubcatFiles(final File lexiconDir) throws IOException { subcatMap = new IMSLexSubcatMap(); lexicalEntryIndex = new TreeMap<String, IMSLexEntry>(); subcatIndex = new TreeMap<String, IMSLexSubcatFrame>(); loadIMSLexSubcatFile(new File(lexiconDir, "Subcat_V.txt"), EPartOfSpeech.verbMain, false); loadIMSLexSubcatFile(new File(lexiconDir, "Subcat_PartV.txt"), EPartOfSpeech.verbMain,true); loadIMSLexSubcatFile(new File(lexiconDir, "Subcat_ADJ.txt"), EPartOfSpeech.adjective, false); loadIMSLexSubcatFile(new File(lexiconDir, "Subcat_NN.txt"), EPartOfSpeech.nounCommon, false); loadIMSLexSubcatFile(new File(lexiconDir, "Subcat_ADV.txt"), EPartOfSpeech.adverb, false); } protected void loadIMSLexSubcatFile(final File lexiconFile, EPartOfSpeech pos, boolean particleVerbs) throws IOException { Map<String, String> semanticMappings = new TreeMap<String, String>(); semanticMappings.put("n-type(measure)", ELabelNameSemantics.SEMANTIC_NOUN_CLASS_MEASURE_NOUN); semanticMappings.put("n-type(mass)", ELabelNameSemantics.SEMANTIC_NOUN_CLASS_MASS_NOUN); // Nouns with the following specifications have not been considered (very noisy): semanticMappings.put("ntype(app-buchst-zahl)", ""); semanticMappings.put("ntype(name)", ""); semanticMappings.put("ntype(app)", ""); semanticMappings.put("ntype(beruf)", ""); semanticMappings.put("ntype(name-det)", ""); BufferedReader reader = new BufferedReader(new InputStreamReader( new FileInputStream(lexiconFile), "UTF-8")); try { String line; while ((line = reader.readLine()) != null) { String lemma; String auxiliary; String subcatLabel; if (particleVerbs) { int idx = line.indexOf(" "); if (idx < 0 && !line.isEmpty()) System.err.println("Skipping line " + line); if (idx < 0) continue; lemma = line.substring(0, idx); line = line.substring(idx + 1); idx = line.indexOf(" "); if (idx < 0 && !line.isEmpty()) System.err.println("Skipping line " + line); if (idx < 0) continue; auxiliary = line.substring(0, idx); subcatLabel = line.substring(idx + 1); } else { int idx = line.indexOf("\t"); if (idx < 0 && !line.isEmpty()) System.err.println("Skipping line " + line); if (idx < 0) continue; lemma = line.substring(0, idx); auxiliary = null; subcatLabel = line.substring(idx + 1); } subcatLabel = subcatLabel.trim(); if (subcatLabel.isEmpty()) continue; if (subcatLabel.startsWith("(") && subcatLabel.endsWith(")")) subcatLabel = subcatLabel.substring(1, subcatLabel.length() - 1); String semanticLabel = null; for (Entry<String, String> labelMapping : semanticMappings.entrySet()) { if (!subcatLabel.contains(labelMapping.getKey())) continue; semanticLabel = labelMapping.getValue(); subcatLabel = subcatLabel.replace(labelMapping.getKey(), ""); } if (semanticLabel != null && semanticLabel.isEmpty()) continue; String argumentString = subcatMap.createArgumentString(subcatLabel); /* //TODO: Expand alternatives. List<String> subcatKeys = new LinkedList<String>(); if (subcatLabel.contains("(C_wh/ob)")) { //subcatKeys.add(subcatLabel.replace("(C_wh/ob)", "(C_wh)")); subcatKeys.add(subcatLabel.replace("(C_wh/ob)", "(C_ob)")); } else { subcatKeys.add(subcatLabel); } for (String key : subcatKeys) {*/ String key = lemma + "\t" + pos.name(); IMSLexEntry entry = lexicalEntryIndex.get(key); if (entry == null) { entry = new IMSLexEntry(lemma, pos); lexicalEntryIndex.put(key, entry); } IMSLexSubcatFrame subcatFrame = new IMSLexSubcatFrame(subcatLabel, semanticLabel, auxiliary, argumentString); entry.addSubcatFrame(subcatFrame); } } finally { reader.close(); } } private List<SynSemArgMap> parseArgumentStrForRole(final String argumentStr, final SubcategorizationFrame subcatFrame) { List<SynSemArgMap> result = new LinkedList<SynSemArgMap>(); String[] args = argumentStr.split(":"); int idx = 0; for (String arg : args) { if (!arg.contains("syntacticProperty")) { String[] atts = arg.split(","); for(String att : atts){ String [] splits = att.split("="); String attName = splits[0]; if (attName.equals("role")) { SyntacticArgument synArg = subcatFrame.getSyntacticArguments().get(idx); SemanticArgument semArg = new SemanticArgument(); semArg.setSemanticRole(splits[1]); SynSemArgMap synSemArgMap = new SynSemArgMap(); synSemArgMap.setSemanticArgument(semArg); synSemArgMap.setSyntacticArgument(synArg); result.add(synSemArgMap); } } } idx++; } return result; } /** * This method creates (purely syntactic) subcategorization frames * @param IMSlexSubcatSense a IMSlexSubcat sense * @param subcatFrame a subcategorization frame * @return the subcategorization frame */ private SubcategorizationFrame parseArgumentStr(final String syntaxtStr) { SubcategorizationFrame scFrame = new SubcategorizationFrame(); List<SyntacticArgument> synArgs = new LinkedList<SyntacticArgument>(); String[] args = syntaxtStr.split(":"); for(String arg : args) { if (!arg.contains("syntacticProperty")) { SyntacticArgument syntacticArgument = new SyntacticArgument(); syntacticArgument.setId("IMSLexSubcat_SyntacticArgument_" + syntacticArgumentId); syntacticArgumentId++; String[] atts = arg.split(","); for(String att : atts){ String [] splits = att.split("="); String attName = splits[0]; if (attName.equals("grammaticalFunction")){ String gf = splits[1]; if (gf.equals("object")) { gf = gf.replaceAll("object", "directObject"); } syntacticArgument.setGrammaticalFunction(EGrammaticalFunction.valueOf(gf)); } if(attName.equals("syntacticCategory")) { syntacticArgument.setSyntacticCategory(ESyntacticCategory.valueOf(splits[1])); } if(attName.equals("case")) { syntacticArgument.setCase(ECase.valueOf(splits[1])); } if(attName.equals("determiner")) { syntacticArgument.setDeterminer(EDeterminer.valueOf(splits[1])); } if(attName.equals("preposition")) { syntacticArgument.setPreposition(splits[1]); } if(attName.equals("prepositionType")) { syntacticArgument.setPrepositionType(splits[1]); } if(attName.equals("number")) { syntacticArgument.setNumber(EGrammaticalNumber.valueOf(splits[1])); } if(attName.equals("lexeme")) { syntacticArgument.setLexeme(splits[1]); } if(attName.equals("verbForm")) { syntacticArgument.setVerbForm(EVerbForm.valueOf(splits[1])); } if(attName.equals("tense")) { syntacticArgument.setTense(ETense.valueOf(splits[1])); } if(attName.equals("complementizer")) { syntacticArgument.setComplementizer(EComplementizer.valueOf(splits[1])); } } synArgs.add(syntacticArgument); } else { String [] splits = arg.split("="); String sp = splits[1]; if (sp.equals("raising")) { sp = sp.replaceAll("raising", "subjectRaising"); } LexemeProperty lexemeProperty = new LexemeProperty(); lexemeProperty.setSyntacticProperty(ESyntacticProperty.valueOf(sp)); scFrame.setLexemeProperty(lexemeProperty); } } scFrame.setSyntacticArguments(synArgs); return scFrame; } /** * Converts a preprocessed version of IMSLex to Uby-LMF * name of the LMF Lexicon instance: "IMSLexSubcat" * @throws IOException */ public LexicalResource toLMF() throws IOException { // LexicalResource. LexicalResource lexicalResource = new LexicalResource(); lexicalResource.setName(resourceName); lexicalResource.setDtdVersion(dtdVersion); // GlobalInformation. GlobalInformation globalInformation = new GlobalInformation(); globalInformation.setLabel("IMSLex, see PhD thesis of Eckle-Kohler (1999), Version of 06/2012"); lexicalResource.setGlobalInformation(globalInformation); // Lexicon. LinkedList<Lexicon> lexicons = new LinkedList<Lexicon>(); lexicons.add(createLexicon()); lexicalResource.setLexicons(lexicons); return lexicalResource; } protected Lexicon createLexicon() { // Lexicon. Lexicon lexicon = new Lexicon(); lexicon.setLanguageIdentifier(ELanguageIdentifier.GERMAN); lexicon.setId("IMSLexSubcat_Lexicon_0"); lexicon.setName("IMSLexSubcat"); // Sort. List<IMSLexEntry> entries = new ArrayList<IMSLexEntry>(lexicalEntryIndex.values()); Collections.sort(entries, new Comparator<IMSLexEntry>() { public int compare(final IMSLexEntry o1, final IMSLexEntry o2) { String key1 = o1.getLemma() + "\t" + o1.getPos().name(); String key2 = o2.getLemma() + "\t" + o2.getPos().name(); if (o1.getPos() == EPartOfSpeech.adverb) key1 = "\uFF00" + key1; // ensure adverbs are at the end. if (o2.getPos() == EPartOfSpeech.adverb) key2 = "\uFF00" + key2; // ensure adverbs are at the end. return key1.compareTo(key2); } }); final Map<String, String> realizedSCFs = new TreeMap<String, String>(); List<IMSLexSubcatFrame> scfs = new ArrayList<IMSLexSubcatFrame>(); for (IMSLexEntry entry : entries) { String lemmaPos = entry.getLemma() + "\t" + entry.getPos().name(); if (entry.getPos() == EPartOfSpeech.adverb) lemmaPos = "\uFF00" + lemmaPos; // ensure adverbs are at the end. for (IMSLexSubcatFrame scf : entry.getSubcatFrames()) { if (scf.getSemanticLabel() != null) continue; String argStr = scf.getSyntaxStr(); if (argStr.isEmpty()) continue; String scfId = realizedSCFs.get(argStr); if (scfId == null || lemmaPos.compareTo(scfId) < 0) realizedSCFs.put(argStr, lemmaPos); if (scfId == null) scfs.add(scf); } } Collections.sort(scfs, new Comparator<IMSLexSubcatFrame>() { public int compare(final IMSLexSubcatFrame o1, final IMSLexSubcatFrame o2) { String key1 = o1.getSyntaxStr(); key1 = realizedSCFs.get(key1) + "\t1" + key1; String key2 = o2.getSyntaxStr(); key2 = realizedSCFs.get(key2) + "\t1" + key2; return key1.compareTo(key2); } }); Collections.sort(entries, new Comparator<IMSLexEntry>() { public String makePOSSort(final EPartOfSpeech pos) { switch (pos) { case verb: case verbMain: return "1"; case adjective: return "2"; case noun: case nounCommon: case nounProper: return "3"; default: return "4"; } } public int compare(final IMSLexEntry o1, final IMSLexEntry o2) { String key1 = makePOSSort(o1.getPos()) + o1.getLemma(); String key2 = makePOSSort(o2.getPos()) + o2.getLemma(); return key1.compareTo(key2); } }); // SubcategorationFrame. createSubcategorizationFrames(lexicon, scfs); // Lexical entries. createLexicalEntries(lexicon, entries); return lexicon; } protected void createLexicalEntries(final Lexicon lexicon, final List<IMSLexEntry> entries) { List<SynSemCorrespondence> synSemCorrespondences = new ArrayList<SynSemCorrespondence>(); List<SemanticPredicate> semanticPredicates = new LinkedList<SemanticPredicate>(); List<LexicalEntry> lexicalEntries = new LinkedList<LexicalEntry>(); int lexicalEntryId = 0; int senseId = 0; int syntacticBehaviourId = 0; int semanticArgumentId = 0; int synSemCorrespondenceId = 0; for (IMSLexEntry entry : entries) { // LexicalEntry. LexicalEntry lexicalEntry = new LexicalEntry(); lexicalEntry.setId("IMSLexSubcat_LexicalEntry_" + lexicalEntryId); lexicalEntryId++; lexicalEntry.setPartOfSpeech(entry.getPos()); // Lemma. Lemma lemma = new Lemma(); List<FormRepresentation> formReps = new LinkedList<FormRepresentation>(); FormRepresentation formRep = new FormRepresentation(); String[] lemmaParts = entry.getLemma().split("#"); if (lemmaParts.length > 1) { // Verb with separable prefix (marked by #). lexicalEntry.setSeparableParticle(lemmaParts[0]); formRep.setWrittenForm(lemmaParts[0] + lemmaParts[1]); } else { formRep.setWrittenForm(lemmaParts[0]); } formReps.add(formRep); lemma.setFormRepresentations(formReps); lexicalEntry.setLemma(lemma); // Senses (in IMSLex-Subcat defined by subcat frames). List<IMSLexSubcatFrame> entrySCFs = new ArrayList<IMSLexSubcatFrame>(entry.getSubcatFrames()); Collections.sort(entrySCFs, new Comparator<IMSLexSubcatFrame>() { public int compare(final IMSLexSubcatFrame o1, final IMSLexSubcatFrame o2) { String key1 = (o1.getSemanticLabel() != null ? "9" : "0") + o1.getArgumentStr(); String key2 = (o2.getSemanticLabel() != null ? "9" : "0") + o2.getArgumentStr(); return key1.compareTo(key2); } }); List<SyntacticBehaviour> syntacticBehaviours = new LinkedList<SyntacticBehaviour>(); List <Sense> senses = new LinkedList<Sense>(); String previousArgumentStr = null; for (IMSLexSubcatFrame scf : entrySCFs) { String argumentStr = scf.getSyntaxStr(); if (argumentStr.equals(previousArgumentStr)) continue; //TODO: is that correct? previousArgumentStr = argumentStr; // Sense. Sense sense = new Sense(); sense.setId("IMSLexSubcat_Sense_" + senseId); senseId++; sense.setIndex(senses.size() + 1); senses.add(sense); // MonolingualExternalRef. MonolingualExternalRef monolingualExternalRef = new MonolingualExternalRef(); monolingualExternalRef.setExternalSystem(resourceVersion + "_" + EXTREF_LEMMA); monolingualExternalRef.setExternalReference(entry.getLemma()); List<MonolingualExternalRef> monolingualExternalRefs = new LinkedList<MonolingualExternalRef>(); monolingualExternalRefs.add(monolingualExternalRef); sense.setMonolingualExternalRefs(monolingualExternalRefs); // SemanticLabel. String semanticLabelText = scf.getSemanticLabel(); if (semanticLabelText != null) { List<SemanticLabel> semanticLabels = new ArrayList<SemanticLabel>(); SemanticLabel semanticLabel = new SemanticLabel(); semanticLabel.setLabel(semanticLabelText); semanticLabel.setType(ELabelTypeSemantics.semanticNounClass); semanticLabels.add(semanticLabel); sense.setSemanticLabels(semanticLabels); } // SubcategorizationFrame. if (!scf.getArgumentStr().isEmpty()) { SubcategorizationFrame subcatFrame = subcategorizationFrameIndex.get(scf.getSyntaxStr()); // SyntacticBehavior. SyntacticBehaviour syntacticBehaviour = new SyntacticBehaviour(); syntacticBehaviour.setId("IMSLexSubcat_SyntacticBehaviour_" + syntacticBehaviourId); syntacticBehaviourId++; syntacticBehaviour.setSense(sense); syntacticBehaviour.setSubcategorizationFrame(subcatFrame); syntacticBehaviours.add(syntacticBehaviour); // SynSemArgMap. List<SynSemArgMap> synSemArgMaps = parseArgumentStrForRole(scf.getArgumentStr(), subcatFrame); if (synSemArgMaps != null && !synSemArgMaps.isEmpty()) { // SemanticPredicate. SemanticPredicate semanticPredicate = new SemanticPredicate(); semanticPredicate.setId("IMSLexSubcat_SemanticPredicate_" + semanticPredicates.size()); semanticPredicates.add(semanticPredicate); // SemanticArgument. List<SemanticArgument> semanticArguments = new LinkedList<SemanticArgument>(); for (SynSemArgMap synSemArgMap : synSemArgMaps) { SemanticArgument semanticArgument = synSemArgMap.getSemanticArgument(); semanticArgument.setId("IMSLexSubcat_SemanticArgument_" + semanticArgumentId); semanticArgumentId++; semanticArgument.setPredicate(semanticPredicate); semanticArguments.add(semanticArgument); } semanticPredicate.setSemanticArguments(semanticArguments); // PredicativeRepresentation. List<PredicativeRepresentation> predicativeRepresentations = new LinkedList<PredicativeRepresentation>(); PredicativeRepresentation predicativeRepresentation = new PredicativeRepresentation(); predicativeRepresentation.setPredicate(semanticPredicate); predicativeRepresentations.add(predicativeRepresentation); sense.setPredicativeRepresentations(predicativeRepresentations); // SynSemCorrespondence. SynSemCorrespondence synSemCorrespondence = new SynSemCorrespondence(); synSemCorrespondence.setId("IMSLexSubcat_SynSemCorrespondence_" + synSemCorrespondenceId); synSemCorrespondenceId++; synSemCorrespondence.setSynSemArgMaps(synSemArgMaps); synSemCorrespondences.add(synSemCorrespondence); } } } lexicalEntry.setSenses(senses); if (!syntacticBehaviours.isEmpty()) lexicalEntry.setSyntacticBehaviours(syntacticBehaviours); lexicalEntries.add(lexicalEntry); } lexicon.setLexicalEntries(lexicalEntries); lexicon.setSemanticPredicates(semanticPredicates); lexicon.setSynSemCorrespondences(synSemCorrespondences); } protected Map<String, SubcategorizationFrame> subcategorizationFrameIndex = new TreeMap<String, SubcategorizationFrame>(); protected void createSubcategorizationFrames(final Lexicon lexicon, final List<IMSLexSubcatFrame> scfs) { int subcatFrameId = 0; List<SubcategorizationFrame> subcategorizationFrames = new ArrayList<SubcategorizationFrame>(); for (IMSLexSubcatFrame scf : scfs) { SubcategorizationFrame subcategorizationFrame = parseArgumentStr(scf.getArgumentStr()); subcategorizationFrame.setId("IMSLexSubcat_SubcategorizationFrame_" + subcatFrameId); subcatFrameId++; subcategorizationFrame.setSubcatLabel(scf.getSubcatLabel()); subcategorizationFrames.add(subcategorizationFrame); subcategorizationFrameIndex.put(scf.getSyntaxStr(), subcategorizationFrame); } Collections.sort(subcategorizationFrames, new Comparator<SubcategorizationFrame>() { public int compare(final SubcategorizationFrame o1, final SubcategorizationFrame o2) { return o1.getId().compareTo(o2.getId()); } }); lexicon.setSubcategorizationFrames(subcategorizationFrames); } }