/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.uby.integration.alignment.xml.transform.sensealignments; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import de.tudarmstadt.ukp.integration.alignment.xml.model.Alignments; import de.tudarmstadt.ukp.integration.alignment.xml.model.Decision; import de.tudarmstadt.ukp.integration.alignment.xml.model.Decisiontype; import de.tudarmstadt.ukp.integration.alignment.xml.model.ResourceXml; import de.tudarmstadt.ukp.integration.alignment.xml.model.Source; import de.tudarmstadt.ukp.integration.alignment.xml.model.Target; import de.tudarmstadt.ukp.integration.alignment.xml.model.XmlMeta; /** * Convert given VerbNet to WordNet alignment file to generic alignment xml */ public class VnWnSenseAlignmentXml extends SenseAlignmentXml { private final Log logger = LogFactory.getLog(getClass()); public VnWnSenseAlignmentXml(String alignmentFile, String outFile) { super(alignmentFile, outFile); } @Override public void toAlignmentXml(XmlMeta metadata) { String decisionSrc = metadata.decisiontypes.get(0).id; BufferedReader reader = null; List<Source> sources = new ArrayList<>(); try { System.out.println(alignmentFile); reader = new BufferedReader(new InputStreamReader( new FileInputStream(new File(alignmentFile)))); String line = null; int lineNumber = 0; int paircount = 0; while ((line = reader.readLine()) != null) { String temp[] = line.split("#"); String verbnetItem = temp[0]; // VerbNet_3.2_eng_sense String wordNetItem = temp[1]; wordNetItem = wordNetItem.substring(4); wordNetItem = wordNetItem.replace(")", "").trim(); if (wordNetItem.length() != 0) { // create the source object Source source = new Source(); source.ref = verbnetItem; List<Target> targets = new ArrayList<>(); // elements separated by whitespace String wordNetItems[] = wordNetItem.split(" "); for (String wordnet : wordNetItems) { if (wordnet.trim().length() > 0) { // format to match wordnet part of speech and sense // key wordnet = wordnet.replaceAll("\\?", ""); String refString = "[POS: verb] "; String wordnetref = refString + wordnet + "::"; Target target = new Target(); target.ref = wordnetref; target.decision = new Decision(); target.decision.confidence = DEFAULTCONFIDENCE; target.decision.src = decisionSrc; target.decision.value = true; // default targets.add(target); paircount++; } } source.targets = targets; sources.add(source); } else { logString.append("No alignment target in input for: " + verbnetItem); logString.append(LF); } lineNumber++; } logger.info(logString.toString()); logger.info("number of input lines:" + lineNumber); logger.info("number of alignment pairs:" + paircount); writer.writeMetaData(metadata); Alignments alignments = new Alignments(); alignments.source = sources; writer.writeAlignments(alignments); writer.close(); reader.close(); } catch (IOException ex) { ex.printStackTrace(); } } @Override public XmlMeta getDefaultXmlMeta() { /* * Generate Metadata */ XmlMeta metadata = new XmlMeta(); metadata.title = "VerbNet-WordNet mapping from VerbNet version 3.2"; metadata.creator = "Kipper et al., A large-scale Classification of English Verbs, LRE Journal, 42(1), 2008"; metadata.date = "2015-03-12"; // download date metadata.description = "Manual mapping of VerbNet class members to WordNet sense keys, the mapping is part of the VerbNet"; metadata.identifier = "VNWN32"; metadata.publisher = "University of Colorado"; metadata.rights = "VerbNet 3.0 (and 3.x) License"; metadata.version = "3.2"; ResourceXml sourceResource = new ResourceXml(); sourceResource.description = "VerbNet version 3.2"; sourceResource.id = "VN_Lexicon_0"; // matches UBY lexiconId sourceResource.language = "en"; // identifiertype needs to match externalSystem in UBY: sourceResource.identifiertype = "VerbNet_3.2_eng_sense"; metadata.sourceResource = sourceResource; ResourceXml targetResource = new ResourceXml(); targetResource.description = "WordNet version 3.x"; targetResource.id = "WN_Lexicon_0"; // matches UBY lexiconId targetResource.language = "en"; // identifiertype needs to match externalSystem in UBY: targetResource.identifiertype = "WordNet 3.0 part of speech and sense key"; metadata.targetResource = targetResource; Decisiontype type = new Decisiontype(); type.id = "VerbNet_VNWN"; type.name = "VerbNet 3.1 VNWN"; type.type = Decisiontype.Decision.MANUAL; List<Decisiontype> decisionTypes = new ArrayList<>(); decisionTypes.add(type); metadata.decisiontypes = decisionTypes; // no separate scores given => no scoretype information return metadata; } public static void main(String[] args) { String UBY_HOME = System.getenv("UBY_HOME"); String alfile = UBY_HOME + "/VerbNet/verbNetWordNetAlignment3.2.srt"; String outFile = UBY_HOME + "/target/verbNetWordNetAlignment3.2_gen.xml"; VnWnSenseAlignmentXml al = new VnWnSenseAlignmentXml(alfile, outFile); al.toAlignmentXml(al.getDefaultXmlMeta()); } }