/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.sensealignments; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.FileReader; import java.sql.SQLException; import java.util.List; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.transform.DBConfig; import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment; import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignmentUtils; public class WordnetWikipediaAlignment extends SenseAlignment { public StringBuilder logString; private final SenseAlignmentUtils saUtils; public WordnetWikipediaAlignment(String sourceUrl, String destUrl,String dbDriver,String dbVendor, String alignmentFile, String user, String pass,String UBY_HOME) throws SQLException, InstantiationException, IllegalAccessException, ClassNotFoundException, FileNotFoundException { super(sourceUrl, destUrl, alignmentFile); logString = new StringBuilder(); // new DBConfig s = new DBConfig(sourceUrl,dbDriver,dbVendor, user, pass, true); DBConfig d = new DBConfig(destUrl, user,dbDriver,dbVendor, pass, true); // temp_Duc is the name of temporary table if (sourceUrl.equals(destUrl)) { saUtils = new SenseAlignmentUtils(s, s, 1, 0, "temp_Duc", "temp_Duc"); } else { saUtils = new SenseAlignmentUtils(s, d, 1, 0, "temp_Duc", "temp_Duc"); } saUtils.createDefaultTempTables(true); } @Override public void getAlignment() { int lineNumber = 0; System.out.println("Starting getting alignment from " + getAlignmentFileLocation()); try { BufferedReader reader = new BufferedReader(new FileReader( getAlignmentFileLocation())); String line = null; int count = 0; while ((line = reader.readLine()) != null) { lineNumber++; String temp[] = line.split("\t"); // if connection is available if (temp.length == 2 ) { //String wordNetRef = "[POS: noun] " + temp[0]; // old format String wordNetRef = temp[0]; // System.out.println(wordNetRef); String wikiRef = temp[1]; wikiRef = wikiRef.replaceAll("'", "\\'"); wikiRef = wikiRef.replaceAll("\"", "\\\\\""); List<Sense> WNRefs = saUtils.getSensesByExternalRefID(wordNetRef, 0, true); // true, if wordNetRef is a Synset ID List<Sense> WikiRefs = saUtils.getSensesByExternalRefID(wikiRef,1,false); System.out.println("Line: "+lineNumber + " #alignments " +count); if (WNRefs.size() != 0 && WikiRefs.size() == 1) { for (Sense sense : WNRefs) { addSourceSense(sense); addDestSense(WikiRefs.get(0)); count++; } } else { System.out.println(WNRefs.size() + " " + WikiRefs.size() + " ??"); logString.append(wordNetRef + "\t" + wikiRef); logString.append(LF); } } } reader.close(); saUtils.destroyTempTable(); System.out.println("Number of alignment:" + count); } catch (Exception ex) { ex.printStackTrace(); } } }