/*******************************************************************************
* Copyright 2016
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.lmf.transform.sensealignments;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.List;
import de.tudarmstadt.ukp.lmf.model.core.Sense;
import de.tudarmstadt.ukp.lmf.transform.DBConfig;
import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment;
import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignmentUtils;
public class WikiEnDeSenseAlignment extends SenseAlignment
{
private StringBuilder logString;
public WikiEnDeSenseAlignment(String sourceUrl,
String destUrl,String dbDriver, String dbVendor,
String alignmentFile, String user, String pass, String UBY_HOME) throws FileNotFoundException
{
super(sourceUrl, destUrl,dbDriver,dbVendor, alignmentFile, user, pass,UBY_HOME);
logString = new StringBuilder();
}
public WikiEnDeSenseAlignment(String sourceUrl,
String destUrl,String dbDriver, String dbVendor,
String alignmentFile, String user, String pass) throws FileNotFoundException
{
super(sourceUrl, destUrl,dbDriver,dbVendor, alignmentFile, user, pass,UBY_HOME);
logString = new StringBuilder();
}
@Override
public void getAlignment() // TODO check!
{
try {
System.out.println(getAlignmentFileLocation());
BufferedReader reader = new BufferedReader(new FileReader(getAlignmentFileLocation()));
String line = null;
int lineNumber = 0;
//int count = 0;
while ((line = reader.readLine()) != null) {
lineNumber++;
if (lineNumber != 1) {
String[] tmp = line.split("\t");
String titleWikiEn = tmp[0];
String titleWikiDe = tmp[1];
System.out.println("...processing line: "+lineNumber+"...");
List<Sense> sensesSource = ubySource.getSensesByOriginalReference("Wikipedia",titleWikiEn);//FIXME change to new external System label
List<Sense> sensesDest = ubyDest.getSensesByOriginalReference("Wikipedia",titleWikiDe); //FIXME change to new external System label
Sense source = null, dest = null;
if (sensesSource.size() != 0 && sensesDest.size() != 0) {
for (Sense senseSource : sensesSource) {
if (senseSource.getMonolingualExternalRefs().get(0)
.getExternalReference().equals(titleWikiEn)) {
source = senseSource;
break;
}
}
for (Sense senseDest : sensesDest) {
if (senseDest.getMonolingualExternalRefs().get(0)
.getExternalReference().equals(titleWikiDe)) {
dest = senseDest;
break;
}
}
if (source != null && dest != null) {
addSourceSense(source);
addDestSense(dest);
}
else {
// log
System.out.println("Log mistakes!" + tmp[0] + " \t" + tmp[1]);
logString.append(LF);
logString.append(titleWikiEn + "\t" + titleWikiDe);
}
}
else {
// log
System.out.println("Log mistakes!" + tmp[0] + " \t" + tmp[1]);
logString.append(LF);
logString.append(titleWikiEn + "\t" + titleWikiDe);
}
}
}
reader.close();
}
catch (Exception ex) {
ex.printStackTrace();
}
}
@Deprecated
public void insertAlignment(String sourceUrl,
String destUrl,String dbDriver, String dbVendor, String user, String pass,String UBY_HOME)
throws SQLException, ClassNotFoundException, IOException
{
Class.forName(dbDriver);
Connection connection = DriverManager.getConnection("jdbc:" + dbVendor + "://" + sourceUrl,user,pass);
Statement statement = connection.createStatement();
//ResultSet rs = statement .executeQuery("SELECT externalReference, senseId FROM MonolingualExternalRef where senseId like 'WikiD%'");
FileReader in = new FileReader(UBY_HOME + "/alignment_wp_en_de_all_titles_keys_2012_02_27.txt");
BufferedReader input = new BufferedReader(in);
String line;
int count = 1;
boolean start = true;
while((line =input.readLine())!=null){
if(start) {
start = false;
continue;
}
String[] tokens = line.split("\t");
String wpdename = tokens[3];
String wpenname = tokens[2];
statement.executeUpdate("Insert into SenseAxis(senseAxisId,senseAxisType,senseOneId,senseTwoId,lexicalResourceId,idx) Values('WP_en_de_alignment_"+count+"','crosslingualSenseAlignment','"+wpenname+"','"+wpdename+"','Uby',"+count+")");
System.out.println(count++);
}
input.close();
}
@Deprecated
public void correctAlignmentFile(String sourceUrl,
String destUrl,String dbDriver, String dbVendor, String user, String pass,String UBY_HOME) throws SQLException, ClassNotFoundException, IOException
{
Class.forName(dbDriver);
Connection connection = DriverManager.getConnection("jdbc:"+dbVendor+"://"+sourceUrl,user,pass);
Statement statement = connection.createStatement();
HashMap<String,String> wpde = new HashMap<String, String>();
HashMap<String,String> wpen = new HashMap<String, String>();
ResultSet rs = statement .executeQuery("SELECT externalReference, senseId FROM MonolingualExternalRef where senseId like 'WikiD%'");
while (rs.next()){
wpde.put(rs.getString(1), rs.getString(2));
}
System.out.println("First filled");
rs = statement .executeQuery("SELECT externalReference, senseId FROM MonolingualExternalRef where senseId like 'WikiE%'");
while (rs.next()){
wpen.put(rs.getString(1), rs.getString(2));
}
System.out.println("Second filled");
FileReader in = new FileReader("/home/matuschek/UBY_HOME/alignment_wp_en_de_all_titles_keys_2011_11_08.txt");
BufferedReader input = new BufferedReader(in);
String line;
FileOutputStream out = new FileOutputStream("target/WPDEEN_aligment");
// Connect print stream to the output stream
PrintStream p = new PrintStream(out);
while((line =input.readLine()) != null){
String[] tokens = line.split("\t");
String wpdename = tokens[1];
String wpenname = tokens[0];
String wpenid = wpen.get(wpenname);
String wpdeid = wpde.get(wpdename);
p.println(wpenname +"\t"+wpdename +"\t"+wpenid +"\t"+wpdeid);
System.out.println(wpenname +"\t"+wpdename +"\t"+wpenid +"\t"+wpdeid);
}
input.close();
p.close();
}
public void getAlignmentQuickly(String sourceUrl,
String destUrl,String dbDriver, String dbVendor, String user, String pass,String UBY_HOME){
String line = null;
try {
// new
SenseAlignmentUtils saUtils;
DBConfig s = new DBConfig(sourceUrl,dbDriver,dbVendor,user,pass,true);
DBConfig d = new DBConfig(destUrl,dbDriver,dbVendor,user,pass,true);
// temp_Duc is the name of temporary table
if(sourceUrl.equals(destUrl)) {
saUtils = new SenseAlignmentUtils(s, s, 0, 0, "temp_Duc3", "temp_Duc3");
}
else {
saUtils = new SenseAlignmentUtils(s, d, 0, 0, "temp_Duc3", "temp_Duc3");
}
saUtils.createDefaultTempTables(false);
System.out.println(getAlignmentFileLocation());
BufferedReader reader = new BufferedReader(new FileReader(getAlignmentFileLocation()));
int lineNumber = 0;
//int count = 0;
//saUtils.getSensesByExternalRefID("Aldege \"Baz\" Bastien Memorial Award", 0, false);
while ((line = reader.readLine()) != null) {
lineNumber++;
line = new String(line.getBytes(),"UTF-8");
if (lineNumber != 1) {
String[] tmp = line.split("\t");
String titleWikiEn = tmp[0];
String titleWikiDe = tmp[1];
System.out.println("...processing line: "+lineNumber+"...");
List<Sense> sensesSource = saUtils.getSensesByExternalRefID(titleWikiEn, 0, false);
List<Sense> sensesDest = saUtils.getSensesByExternalRefID(titleWikiDe, 1, false);
Sense source = null, dest = null;
if (sensesSource.size() == 1 && sensesDest.size() == 1) {
source = sensesSource.get(0);
dest = sensesDest.get(0);
if (source != null && dest != null) {
addSourceSense(source);
addDestSense(dest);
}
else {
System.out.println("Log mistakes!" + tmp[0] + " \t"+ tmp[1]);
logString.append(LF);
logString.append(titleWikiEn + "\t" + titleWikiDe);
}
}
else {
// log
System.out.println("Log mistakes!" + tmp[0] + " \t"+ tmp[1]);
logString.append(LF);
logString.append(titleWikiEn + "\t" + titleWikiDe);
}
}
}
reader.close();
// saUtils.destroyTempTable();
}
catch (Exception ex) {
System.out.println("Error happens here:"+line);
ex.printStackTrace();
}
}
}