/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.sensealignments; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.transform.DBConfig; import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment; import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignmentUtils; /** * Convert FrameNet-Wiktionary alignment to UBY Format * @author sh * */ public class FrameNetWiktionaryAlignment extends SenseAlignment { private String debug; public StringBuilder logString; public int nullAlignment; protected static Log logger = LogFactory.getLog(FrameNetWiktionaryAlignment.class); private final SenseAlignmentUtils saUtils; public FrameNetWiktionaryAlignment(String sourceUrl, String destUrl, String dbDriver, String dbVendor, String alignmentFile, String user, String pass, String UBY_HOME) throws SQLException, InstantiationException, IllegalAccessException, ClassNotFoundException, FileNotFoundException { super(sourceUrl, destUrl, alignmentFile); logString = new StringBuilder(); nullAlignment = 0; // new DBConfig s = new DBConfig(sourceUrl,dbDriver,dbVendor, user, pass, true); DBConfig d = new DBConfig(destUrl,dbDriver,dbVendor, user, pass, true); System.out.println("here"); // temp_Duc is the name of temporary table if (sourceUrl.equals(destUrl)) { saUtils = new SenseAlignmentUtils(s, s, 0, 0, "temp_Duc", "temp_Duc"); } else { saUtils = new SenseAlignmentUtils(s, d, 0, 0, "temp_Duc", "temp_Duc"); } String decFields1 = "senseId varchar(255) NOT NULL, externalReference varchar(255)"; String insData1 = "SELECT S.senseId, " + " M.externalReference " + " FROM Sense S JOIN MonolingualExternalRef M" + " ON (S.senseId=M.senseId)" + "where substring(S.senseId,1,2)=\"FN\""; String insData2 = "SELECT S.senseId, " + " M.externalReference " + " FROM Sense S JOIN MonolingualExternalRef M" + " ON (S.senseId=M.senseId)" + "where substring(S.senseId,1,2)=\"Wk\""; saUtils.createTempTable(decFields1, insData1, 0); saUtils.createTempTable(decFields1, insData2, 1); } @Override public void getAlignment() { int lineNumber = 0; BufferedReader reader = null; try { reader = new BufferedReader(new FileReader(getAlignmentFileLocation())); String line = null; while ((line = reader.readLine()) != null) { // each component is separated by tab character lineNumber++; if ((lineNumber != 1) && !line.contains("###")) { String[] lineSplitter = line.split("\t"); if (lineSplitter.length >= 2) { String sourceID = lineSplitter[0]; String destID = lineSplitter[1]; if (!sourceID.equals("null") && !destID.equals("null")) { debug = line; List<Sense> sourceSenses; sourceSenses = saUtils.getSensesByExternalRefID( sourceID, 0, false); List<Sense> senseWKN = saUtils.getSensesByExternalRefID( destID, 1, false); if (sourceSenses.size() != 0 && senseWKN.size() != 0) { for (Sense FNSense : sourceSenses) { if(senseWKN.get(0).getId().startsWith("WktEN")) { addSourceSense(FNSense); addDestSense(senseWKN.get(0)); } else if (senseWKN.size()>1 && senseWKN.get(1).getId().startsWith("WktEN")) { addSourceSense(FNSense); addDestSense(senseWKN.get(1)); } else { System.out.println("no target sense found: "+line); } } } else { logString.append(debug); logString.append(LF); } } else { nullAlignment++; } } } } //restore memories saUtils.destroyTempTable(); } catch (Exception ex) { IOUtils.closeQuietly(reader); ex.printStackTrace(); } } /** * * @param classifierOut - output tsv of weka classification * @param classifierIn - data section of arff input file for classification * (contains ids in same order as classifications in classifierOut) * @param tsvFile * @throws IOException */ public static void classifierOutputToTsv(String classifierOut, String classifierIn, String tsvFile) throws IOException{ List<String> res = new ArrayList<String>(); // read classification InputStream is = new BufferedInputStream(new FileInputStream(new File(classifierOut))); Reader reader = new InputStreamReader(is); BufferedReader br = new BufferedReader(reader); String line = br.readLine(); line = br.readLine(); ArrayList<String> scoreLines = new ArrayList<String>(); while (line!=null){ scoreLines.add(line); line = br.readLine(); System.out.println(scoreLines.size()); } // read ids InputStream is2 = new BufferedInputStream(new FileInputStream(new File(classifierIn))); Reader reader2 = new InputStreamReader(is2); BufferedReader br2 = new BufferedReader(reader2); String line2 = br2.readLine(); line2 = br2.readLine(); ArrayList<String> idLines = new ArrayList<String>(); while (line2!=null){ idLines.add(line2); line2 = br2.readLine(); } System.out.println(scoreLines.size()); System.out.println(idLines.size()); if (scoreLines.size()!=idLines.size()){// logger.warn("files do not agree"); } int positive = 0; int negative = 0; for (int i=0;i<scoreLines.size();i++){ String[] scoreitems = scoreLines.get(i).split(":"); String[] iditems = idLines.get(i).split(","); String first = iditems[0]; String second = iditems[1]; String sysScore = scoreitems[2].split(",")[0]; if (sysScore.equals("1")){// pair classified as alignment res.add(first + "\t"+ second); positive++; } else { negative++; } } logger.info("positive class-->added as alignment: " + positive); logger.info("negative class-->no alignment: " + negative); System.out.println("write positive class to file"); FileWriter fw = new FileWriter(new File(tsvFile)); for (String r: res){ fw.write(r+"\n"); } fw.close(); br2.close(); br.close(); } }