/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.sensealignments; import java.io.BufferedReader; import java.io.FileNotFoundException; import java.io.IOException; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.transform.alignments.SenseAlignment; public class VerbNetWordNetAlignment extends SenseAlignment { List<String> missMatchs; StringBuilder logString; public VerbNetWordNetAlignment(String sourceUrl, String destUrl,String dbDriver,String dbVendor, String alignmentFile,String user, String pass, String UBY_HOME) throws FileNotFoundException { super(sourceUrl, destUrl,dbDriver,dbVendor,alignmentFile,user,pass,UBY_HOME); missMatchs = new ArrayList<String>(); logString = new StringBuilder(); } public VerbNetWordNetAlignment(String sourceUrl, String destUrl,String dbDriver,String dbVendor, String alignmentFile,String user, String pass) throws FileNotFoundException { this(sourceUrl, destUrl,dbDriver,dbVendor,alignmentFile,user,pass,UBY_HOME); } public void getAlignmentDirectSQL(String sourceUrl,String destUrl, String dbDriver, String dbVendor, String user, String pass, String UBY_HOME, boolean createLMFobjects) throws SQLException, ClassNotFoundException, IOException, IllegalArgumentException { Class.forName(dbDriver); Connection connection = DriverManager.getConnection("jdbc:"+dbVendor+"://"+sourceUrl,user,pass); Statement statement = connection.createStatement(); HashMap<String,String> vn = new HashMap<String, String>(); HashMap<String,String> wn = new HashMap<String, String>(); ResultSet rs = statement .executeQuery("SELECT externalReference,senseId FROM MonolingualExternalRef where senseId like 'VN%' order by externalReference"); int senseno = 0; String previous = ""; System.out.println(); while (rs.next()) { String line = rs.getString(1); if (!line.equals(previous)){ previous=line; senseno=0; }else{ senseno++; } System.out.println(line+"$$$"+senseno+" "+rs.getString(2)); vn.put(line+"$$$"+senseno, rs.getString(2)); } System.out.println("First filled"); rs = statement .executeQuery("SELECT externalReference,senseId FROM MonolingualExternalRef where senseId like 'WN%' and externalReference like '[POS: v%'"); while (rs.next()) { wn.put(rs.getString(1), rs.getString(2)); System.out.println(rs.getString(1)+" "+rs.getString(2)); } System.out.println("Second filled"); System.out.println(getAlignmentFileLocation()); BufferedReader reader = parseMetaData(); String line = null; int lineNumber = 0; int count=0; // System String previousLine=""; int senseIndex=0; while ((line = reader.readLine()) != null) { if (!line.equals(previousLine)){ previousLine=line; senseIndex=0; }else{ senseIndex++; } String temp[] = line.split("#"); String verbnetItem = temp[0]; String wordNetItem = temp[1]; Double confidence = null; String metaDataId = null; if(temp.length > 2 && !temp[2].equals("null")) confidence = Double.parseDouble(temp[2]); if(temp.length > 3 && !temp[3].equals("null")) metaDataId = temp[3]; wordNetItem = wordNetItem.substring(4); wordNetItem = wordNetItem.replace(")", "").trim(); //System.out.println(lineNumber+":"+verbnetItem+" \t"+ //wordNetItem); if (wordNetItem.length() != 0) { String vnId = vn.get(verbnetItem+"$$$"+senseIndex); // each element separates by space bar to each other. String wordNetItems[] = wordNetItem.split(" "); for (String wordnet : wordNetItems) { // wordnet if (wordnet.trim().length() > 0) { //replace question mark wordnet=wordnet.replaceAll("\\?",""); String[] tmp = wordnet.split("%"); String word = tmp[0].trim(); // verbnet System.out.println(count); String wnid = wn.get("[POS: verb] "+wordnet+"::"); //we should check wnSense has just only one item! if not Stop and write the error out! //System.out.println(lineNumber+":"+vnId+" \t"+ wnid); if (vnId !=null && wnid!=null){ if (createLMFobjects) { Sense sourceSense = ubySource.getSenseById(vnId); Sense destSense = ubyDest.getSenseById(wnid); addSourceSense(sourceSense); addDestSense(destSense); if (metaDataId != null && confidence != null) { addMetaData(metaDataId, confidence); } } /*statement.execute("insert into SenseAxis(senseAxisId,senseAxisType,senseOneId,senseTwoId,synsetOneId,synsetTwoId,lexicalResourceID) VALUES (" + "'"+"VN_WN_alignment_"+count+++"'"+","+ "'"+"monolingualSenseAlignment" +"'"+","+ "'"+vnId+"'"+","+ "'"+wnid+"'"+","+ "'"+"null"+"'"+","+ "'"+"null"+"'"+","+ "'Uby'"+ ")"); */ count++; }else{ missMatchs.add(wordnet); logString.append(verbnetItem+"\t"+wordnet); logString.append(LF); } } } } lineNumber++; } System.out.println("number of alignment:"+count); } @Override public void getAlignment() { String error = ""; try { System.out.println(getAlignmentFileLocation()); BufferedReader reader = parseMetaData();// new BufferedReader(new FileReader(getAlignmentFileLocation())); String line = null; int lineNumber = 0; int count=0; // System String previousLine=""; int senseIndex = 0; while((line = reader.readLine()) != null) { if (!line.equals(previousLine)){ previousLine = line; senseIndex = 0; }else{ senseIndex++; } String temp[] = line.split("#"); String verbnetItem = temp[0]; String wordNetItem = temp[1]; Double confidence = null; String metaDataId = null; if(temp.length > 2 && !temp[2].equals("null")) confidence = Double.parseDouble(temp[2]); if(temp.length > 3 && !temp[3].equals("null")) metaDataId = temp[3]; wordNetItem = wordNetItem.substring(4); wordNetItem = wordNetItem.replace(")", "").trim(); // System.out.println(lineNumber+":"+verbnetItem+" \t"+ // wordNetItem); if (wordNetItem.length() != 0) { //if the line has alignment with not null wordnet List<Sense>senses = ubySource.getSensesByOriginalReference("VerbNet_3.2_eng_sense", verbnetItem); //System.out.println(verbnetItem+" -- # sense: "+senses.size()); //current sense in verb net Sense currentVerbNetSense = senses.get(senseIndex); // each element separates by space bar to each other. String wordNetItems[] = wordNetItem.split(" "); for (String wordnet : wordNetItems) { // wordnet if (wordnet.trim().length() > 0) { //replace question mark wordnet = wordnet.replaceAll("\\?",""); String[] tmp = wordnet.split("%"); String word = tmp[0].trim(); error = wordnet; // verbnet String verb = verbnetItem.replaceAll(word + "_", "").split("-")[0]; // System.out.println(verb + " --> " + word + "--" // + lex_filenum + "--" + lex_id); // now searching the sense in each database in LMF // format System.out.println(count); String refString="[POS: verb] "; List<Sense> wnSenses = ubyDest.getSensesByOriginalReference("WordNet_3.0_eng_senseKey", refString+wordnet+"::"); //we should check wnSense has just only one item! if not Stop and write the error out! if (wnSenses.size()!=0 && currentVerbNetSense!=null){ addSourceSense(currentVerbNetSense); addDestSense(wnSenses.get(0)); if (metaDataId != null && confidence != null) { addMetaData(metaDataId, confidence); } count++; }else{ System.out.println(wordnet + " "+verb); missMatchs.add(wordnet); logString.append(verbnetItem+"\t"+wordnet); logString.append(LF); } } } } lineNumber++; } System.out.println("number of alignment:"+count); } catch (IOException ex) { ex.printStackTrace(); } catch (Exception e) { System.out.println("Debug:" + error); e.printStackTrace(); } } }