/*******************************************************************************
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.lmf.transform.sensealignments;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import de.tudarmstadt.ukp.lmf.model.core.Sense;
import de.tudarmstadt.ukp.lmf.transform.DBConfig;
import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment;
import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignmentUtils;
/**
* Convert the FrameNet-WordNet alignments to UBY format. This class takes the
* FrameNet 1.5 and WordNet 3.0 ids from a file and integrates them to UBY
*
* @author Silvana Hartmann
*
*/
public class FramenetWordnetAlignment extends SenseAlignment {
static String UBY_HOME = System.getenv("UBY_HOME");
static String DKPRO_HOME = System.getenv("DKPRO_HOME");
protected static Log logger = LogFactory.getLog
(FramenetWordnetAlignment.class);
protected SenseAlignmentUtils saUtils;
ArrayList<String> notfoundWn = null;
ArrayList<String> notfoundFn = null;
ArrayList<String> notAddedAll;
int inputsize = 0;
DBConfig tempsource;
/**
*
* @param sourceUrl
* @param destUrl
* @param alignmentFile
* @param user
* @param pass
* @throws SQLException
* @throws InstantiationException
* @throws IllegalAccessException
* @throws ClassNotFoundException
* @throws FileNotFoundException
*/
public FramenetWordnetAlignment(String sourceUrl, String destUrl, String dbDriver, String dbVendor,
String alignmentFile, String user, String pass, String UBY_HOME)
throws SQLException, InstantiationException,
IllegalAccessException, ClassNotFoundException, FileNotFoundException {
super(sourceUrl, destUrl, dbDriver, dbVendor, alignmentFile, user, pass, UBY_HOME);
notfoundFn = new ArrayList<String>();
notfoundWn = new ArrayList<String>();
notAddedAll = new ArrayList<String>();
tempsource = new DBConfig(sourceUrl, dbDriver, dbVendor, user, pass, false);
saUtils = new SenseAlignmentUtils(tempsource, tempsource, 0, 0,"temp_Duc", "temp_Duc");
}
public FramenetWordnetAlignment(String sourceUrl, String destUrl,
String alignmentFile, String user, String pass) throws SQLException, InstantiationException,
IllegalAccessException, ClassNotFoundException, FileNotFoundException{
this(sourceUrl, destUrl,"com.mysql.jdbc.Driver",
"mysql", alignmentFile, user, pass, UBY_HOME);
}
/**
* Collect UBY SenseIds for the aligned senses based on synsetId and lemma
* for WordNet and based on lexical unit id for FrameNet
*
* @throws IllegalArgumentException
*/
@Override
public void getAlignment()
throws IllegalArgumentException {
List<String[]> data = null;
data = readAlignmentFile();
if (ubySource == null) {
logger.warn("uby source is empty");
}
int counter = 0; // input sense pairs
int found = 0; // output sense pairs
// temp table for FN
String declareFieldsFN = "senseId varchar(255) NOT NULL, externalReference varchar(255)";
String sqlInsertDataFN = "SELECT S.senseId, "
+ " M.externalReference "
+ " FROM Sense S JOIN MonolingualExternalRef M"
+ " ON (S.senseId=M.senseId)"
+ "where substring(S.senseId,1,2)=\"FN\"";
String declareFieldsWN = "senseId varchar(255) NOT NULL, "
+ "synsetId varchar(255) NOT NULL, "
+ "writtenForm varchar(255) NOT NULL, "
+ "lexicalEntryId varchar(255) NOT NULL, "
+ "externalReference varchar(255)";
// temp table for WN
String sqlInsertDataWN = "SELECT Sense.senseId, Sense.synsetId,"
+ "FormRepresentation_Lemma.writtenForm,LexicalEntry.lexicalEntryId, "
+ "MonolingualExternalRef.externalReference "
+ "FROM Sense JOIN (MonolingualExternalRef,LexicalEntry,FormRepresentation_Lemma) "
+ "ON (Sense.synsetId=MonolingualExternalRef.synsetId "
+ "AND Sense.lexicalEntryId=LexicalEntry.lexicalEntryId "
+ "AND FormRepresentation_Lemma.lemmaId=LexicalEntry.lemmaId) "
+ "WHERE MonolingualExternalRef.externalSystem=\"WordNet_3.0_eng_synsetOffset\"";
try {
saUtils.createTempTable(declareFieldsWN, sqlInsertDataWN, 0);
saUtils.createTempTable(declareFieldsFN, sqlInsertDataFN, 1);
} catch (SQLException e1) {
e1.printStackTrace();
}
// iterate over alignment entries
for (String[] d : data) {
counter++;
// show progress:
if ((counter % 1000) == 0) {
logger.info("# processed alignments: " + counter);
}
List<String> wnSenses;
List<Sense> fnSenses;
try {
// get FrameNet sense by ExternalReference (lexical unit Id)
fnSenses = saUtils.getSensesByExternalRefID(d[0], 1, false);
// get WordNet sense by Synset Offset and Lemma
wnSenses = saUtils.getSensesByWNSynsetOffsetAndLemma(d[1],
d[2].replace("_", " "), 0);
if (fnSenses.size() == 1) { // exactly one fn sense
Sense fns = fnSenses.get(0);
if (wnSenses.size() == 1) { // exactly one wn sense
// add the data
addSourceSense(fns);
Sense wns = ubySource.getSenseById(wnSenses.get(0));
addDestSense(wns);
found++;
} else if (wnSenses.size() == 0) { // no WN sense
logger.warn("WN sense not found: " + d[1]
+ " " + d[2].replace("_", " "));
} else { // more than one WN sense
logger.info(
"More than one WN sense for this key: " + d[1]
+ " " + d[2].replace("_", " "));
for (String sid : wnSenses) {
Sense wns = ubySource.getSenseById(sid);
addSourceSense(fns);
addDestSense(wns);
}
}
} else if (fnSenses.size() == 0) {
logger.warn("No FN sense for this key: "
+ d[0]);
} else {
logger.warn(
"More than one FN sense for this key: " + d[0]);
}
} catch (SQLException e1) {
e1.printStackTrace();
}
}
logger.info("Alignments in: " + counter + "Alignments out: " + found);
}
/**
* Read alignment file in standard format, e.g.: fn_luId, wn_synset ID,
* wn_lemma, fn_lemma
*
* @return
* @throws IOException
*/
private List<String[]> readAlignmentFile() {
List<String[]> alignment = new ArrayList<String[]>();
int lineNumber = 0;
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(getAlignmentFileLocation()));
String line = null;
while ((line = reader.readLine()) != null) {
lineNumber++;
String[] items = line.split("\t");
alignment.add(items);
}
} catch (FileNotFoundException e) {
System.err.println("File not found: " + getAlignmentFileLocation());
e.printStackTrace();
} catch (IOException e) {
System.err.println("File could not be opended: " + getAlignmentFileLocation());
IOUtils.closeQuietly(reader);
}
inputsize = lineNumber;
return alignment;
}
/**
* Write output lines to given file
*
* @param outFile
* @param lines
* @throws IOException
*/
protected static void writeLines(String outFile, Collection<String> lines)
throws IOException {
BufferedWriter writer = null;
try {
writer = new BufferedWriter(new FileWriter(new File(outFile)));
for (String line : lines) {
writer.write(line + "\n");
}
} catch (IOException e) {
System.err.println("Exception" + e + "could not write to" + outFile);
} finally {
if (writer != null) {
writer.close();
}
}
}
}