/*******************************************************************************
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.lmf.transform.sensealignments;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.sql.SQLException;
import java.util.List;
import java.util.logging.Logger;
import org.apache.commons.io.IOUtils;
import de.tudarmstadt.ukp.lmf.api.Uby;
import de.tudarmstadt.ukp.lmf.model.core.Sense;
import de.tudarmstadt.ukp.lmf.transform.DBConfig;
import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment;
/**
* Converts GermaNet sense ID - WiktionaryDE sense ID alignment file to UBY format (SenseAxis)
* by looking up original sense IDs in a Uby database containing GermaNet and OntoWiktionary DE
*
*/
public class GermaNetOntoWiktionaryDEAlignment extends SenseAlignment{
private String debug;
public StringBuilder logString;
public int nullAlignment;
protected static Logger logger = Logger
.getLogger(FrameNetWiktionaryAlignment.class.getName());
private Uby uby;
public GermaNetOntoWiktionaryDEAlignment(String sourceUrl, String destUrl, String dbDriver,String dbVendor,
String alignmentFile, String user, String pass)
throws SQLException, InstantiationException, IllegalAccessException, ClassNotFoundException, FileNotFoundException
{
super(sourceUrl, destUrl, alignmentFile);
logString = new StringBuilder();
nullAlignment = 0;
DBConfig db = new DBConfig(sourceUrl,dbDriver,dbVendor, user, pass, true);
uby = new Uby(db);
// source and target DBConfig is never used! just initiated because class extends SenseAlignment
DBConfig s = new DBConfig(sourceUrl,dbDriver,dbVendor, user, pass, true);
DBConfig d = new DBConfig(destUrl,dbDriver,dbVendor, user, pass, true);
}
@Override
public void getAlignment()
{
// source = GermaNet, destination = WiktionaryDE
int lineNumber = 0;
BufferedReader reader = null;
String targetPrefix = "OntoWktDE";
try {
reader = new BufferedReader(new FileReader(
getAlignmentFileLocation()));
String line = null;
while ((line = reader.readLine()) != null) {
// each component is separated by tab character
lineNumber++;
if (lineNumber != 1) {
String[] line_splitter = line.split("\t");
if (line_splitter.length >= 2) {
String germaNetID = line_splitter[0];
String sourceID = germaNetID.replaceAll("l", "");
System.out.println(sourceID);
String destID = line_splitter[1];
if (!sourceID.equals("null") && !destID.equals("null")) {
debug = line;
// externalSystem="GermaNet_9.0_deu_lexicalUnit" externalReference="65739"
// externalSystem="Wiktionary_1.0.0_2013-02-20_deu_sense" externalReference="113219:0:1
List<Sense> sourceSenses = uby.getSensesByOriginalReference("GermaNet_9.0_deu_lexicalUnit", sourceID);
List<Sense> destSenses = uby.getSensesByOriginalReference("Wiktionary_1.0.0_2013-02-20_deu_sense", destID);
if (sourceSenses.size() != 0
&& destSenses.size() != 0) {
for (Sense sourceSense : sourceSenses) {
if(destSenses.get(0).getId().startsWith(targetPrefix)) {
addSourceSense(sourceSense);
addDestSense(destSenses.get(0));
System.out.println(sourceID +" " +sourceSense +"\t"
+destID +" " +destSenses.get(0));
}
else if (destSenses.size()>1 && destSenses.get(1).getId().startsWith(targetPrefix)) {
addSourceSense(sourceSense);
addDestSense(destSenses.get(1));
System.out.println(sourceID +" " +sourceSense +"\t"
+destID +" " +destSenses.get(0));
} else {
System.out.println("no target sense found: "+line +" " +sourceSenses
+" " +destSenses);
}
}
}
else {
logString.append(debug);
logString.append(LF);
}
}
else {
nullAlignment++;
}
}
}
}
}
catch (Exception ex) {
IOUtils.closeQuietly(reader);
ex.printStackTrace();
}
}
}