/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.sensealignments; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.sql.SQLException; import java.util.List; import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.transform.DBConfig; import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment; import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignmentUtils; /** * Converts GermaNet sense ID - WiktionaryDE sense ID alignment file to UBY format (SenseAxis) * @author Judith Eckle-Kohler * */ public class GermaNetWiktionaryDeAlignment extends SenseAlignment{ private String debug; public StringBuilder logString; public int nullAlignment; protected static Log logger = LogFactory.getLog(FrameNetWiktionaryAlignment.class); private final SenseAlignmentUtils saUtils; public GermaNetWiktionaryDeAlignment(String sourceUrl, String destUrl, String dbDriver,String dbVendor, String alignmentFile, String user, String pass, String UBY_HOME) throws SQLException, InstantiationException, IllegalAccessException, ClassNotFoundException, FileNotFoundException { super(sourceUrl, destUrl, alignmentFile); logString = new StringBuilder(); nullAlignment = 0; // source = GermaNet, destination = WiktionaryDE DBConfig sourceDbConfig = new DBConfig(sourceUrl,dbDriver,dbVendor, user, pass, true); DBConfig destDbConfig = new DBConfig(destUrl,dbDriver,dbVendor, user, pass, true); System.out.println("here"); if (sourceUrl.equals(destUrl)) { saUtils = new SenseAlignmentUtils(sourceDbConfig, sourceDbConfig, 0, 0, "tempTableSource", "tempTableSource"); // type of source ID = destination ID = 0 - this means the alignment is between sense ID (in contrast to synset ID) } else { saUtils = new SenseAlignmentUtils(sourceDbConfig, destDbConfig, 0, 0, "tempTableSource", "tempTableDest"); } String decFields1 = "senseId varchar(255) NOT NULL, externalReference varchar(255)"; // declare fields String insData1 = "SELECT S.senseId, " + " M.externalReference " + " FROM Sense S JOIN MonolingualExternalRef M" + " ON (S.senseId=M.senseId)" + "where substring(S.senseId,1,2)=\"GN\""; String insData2 = "SELECT S.senseId, " + " M.externalReference " + " FROM Sense S JOIN MonolingualExternalRef M" + " ON (S.senseId=M.senseId)" + "where substring(S.senseId,1,4)=\"WktD\""; saUtils.createTempTable(decFields1, insData1, 0); saUtils.createTempTable(decFields1, insData2, 1); } @Override public void getAlignment() { // source = GermaNet, destination = WiktionaryDE int lineNumber = 0; BufferedReader reader = null; try { reader = new BufferedReader(new FileReader( getAlignmentFileLocation())); String line = null; while ((line = reader.readLine()) != null) { // each component is separated by tab character lineNumber++; if (lineNumber != 1) { String[] line_splitter = line.split("\t"); if (line_splitter.length >= 2) { String germaNetID = line_splitter[0]; String sourceID = germaNetID.replaceAll("l", ""); System.out.println(sourceID); String destID = line_splitter[1]; if (!sourceID.equals("null") && !destID.equals("null")) { debug = line; List<Sense> sourceSenses = saUtils.getSensesByExternalRefID( sourceID, 0, false); List<Sense> destSenses = saUtils .getSensesByExternalRefID(destID, 1,false); if (sourceSenses.size() != 0 && destSenses.size() != 0) { for (Sense sourceSense : sourceSenses) { if(destSenses.get(0).getId().startsWith("WktDE")) { addSourceSense(sourceSense); addDestSense(destSenses.get(0)); } else if (destSenses.size()>1 && destSenses.get(1).getId().startsWith("WktDE")) { addSourceSense(sourceSense); addDestSense(destSenses.get(1)); } else { System.out.println("no target sense found: "+line +" " +sourceSenses +" " +destSenses); } } } else { logString.append(debug); logString.append(LF); } } else { nullAlignment++; } } } } //restore memories saUtils.destroyTempTable(); } catch (Exception ex) { IOUtils.closeQuietly(reader); ex.printStackTrace(); } } }