/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.sensealignments; import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.List; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.stream.XMLStreamException; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; import de.tudarmstadt.ukp.lmf.model.core.LexicalEntry; import de.tudarmstadt.ukp.lmf.model.core.Lexicon; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.model.enums.ELabelTypeSemantics; import de.tudarmstadt.ukp.lmf.model.enums.EPartOfSpeech; import de.tudarmstadt.ukp.lmf.model.meta.SemanticLabel; import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment; public class VerbNetFrameNetSenseAlignment extends SenseAlignment { static String UBY_HOME = System.getenv("UBY_HOME"); public int inputsize = 0; public ArrayList<String> notAdded; public VerbNetFrameNetSenseAlignment(String sourceUrl, String destUrl,String dbDriver,String dbVendor, String alignmentFile,String user, String pass, String UBY_HOME) throws FileNotFoundException { super(sourceUrl, destUrl, dbDriver,dbVendor, alignmentFile, user, pass,UBY_HOME); notAdded = new ArrayList<String>(); } public VerbNetFrameNetSenseAlignment(String sourceUrl, String destUrl, String alignmentFile,String user, String pass) throws FileNotFoundException { this(sourceUrl, destUrl, "com.mysql.jdbc.Driver","mysql", alignmentFile, user, pass,UBY_HOME); } @Override public void getAlignment() throws IllegalArgumentException { BufferedReader reader = null; try { System.out.println(getAlignmentFileLocation()); reader = new BufferedReader(new FileReader(getAlignmentFileLocation())); String line = null; int count=0; Lexicon vnLex = ubySource.getLexiconByName("VerbNet"); while ((line = reader.readLine()) != null) { inputsize++; if (inputsize%200==0){ System.out.println("# alignments: " + inputsize); } StringBuffer lineinfo = new StringBuffer(); String[] items = line.split("\t"); String luId = items[0]; String vnLemma = items[1].trim(); String vnClass = items[2]; int added = 0; // get FrameNet senses by given luId List<Sense> senses=ubySource.getSensesByOriginalReference("FrameNet_1.5_eng_lexicalUnit", luId); if (senses.size() > 0){ for (Sense fns: senses){ // get potential vn targets (defined by lemma and pos, and VN-class) List<LexicalEntry> entries = ubySource.getLexicalEntries(vnLemma, EPartOfSpeech.verb, vnLex); if (entries.size() > 0){ for (LexicalEntry e: entries){ List<Sense> vnSenses = e.getSenses(); for (Sense vns: vnSenses){ String senseId = vns.getId(); // filter by VN-class List<SemanticLabel> labels = ubySource.getSemanticLabelsbySenseIdbyType(senseId, ELabelTypeSemantics.verbnetClass.toString()); for (SemanticLabel l: labels){ String[] labelItems = l.getLabel().split("-"); StringBuffer parsedLabel = new StringBuffer(); parsedLabel.append(labelItems[1]); for (int i=2;i<labelItems.length;i++) { parsedLabel.append("-"+labelItems[i]); } if (parsedLabel.toString().equals(vnClass)) { addSourceSense(fns); addDestSense(vns); added++; count++; } } } } } else { lineinfo.append("-VN sense for this lemma-POS not found. "); } } } else { lineinfo.append("-FN sense not found: " + luId); } if (added == 0){ notAdded.add(lineinfo.toString()); } } for (String lineinfo : notAdded){ System.out.println(lineinfo); } System.out.println("number of alignments:" + count); System.out.println("number of lines in infile:" + inputsize); reader.close(); } catch (IOException ex) { ex.printStackTrace(); } } public static void convertVnFnSemlink(String inFile, String outFile) throws XMLStreamException, ParserConfigurationException, SAXException, IOException{ int noTarget = 0; int lines = 0; DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); DocumentBuilder builder = factory.newDocumentBuilder(); Document doc = builder.parse(new File(inFile)); doc.getDocumentElement().normalize(); NodeList entries = doc.getElementsByTagName("vncls"); ArrayList<String> output = new ArrayList<String>(); for (int i=0;i<entries.getLength();i++){ Node alignment = entries.item(i); NamedNodeMap atts = alignment.getAttributes(); String vnclass = atts.getNamedItem("class").getTextContent(); String vnlemma = atts.getNamedItem("vnmember").getTextContent(); String luId = atts.getNamedItem("fnlexent").getTextContent(); // there are mappings with empty (fn) target: if (luId.equals("")){ noTarget++; } else { output.add( luId+"\t"+ vnlemma+"\t"+vnclass+"\n"); } lines++; } System.out.println("Converted " + inFile + ", statistics:"); System.out.println("\tInput Lines: " + lines); System.out.println("\tOutput: " + output.size()); System.out.println("\tNo alignment target: " + noTarget); System.out.println("\tControl: output + no alignment = input lines: " + (output.size() + noTarget)); BufferedWriter writer = null; try { writer = new BufferedWriter(new FileWriter(new File(outFile))); for (String line: output){ writer.write(line); } } catch (IOException e) { System.err.println("Exception" + e + "could not write to" + outFile); } finally { if (writer!=null) { writer.close(); } } } }