/******************************************************************************* * Copyright 2016 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. ******************************************************************************/ package de.tudarmstadt.ukp.lmf.transform.alignments; import java.sql.ResultSet; import java.sql.SQLException; import java.util.ArrayList; import java.util.List; import de.tudarmstadt.ukp.lmf.model.core.Sense; import de.tudarmstadt.ukp.lmf.model.semantics.Synset; import de.tudarmstadt.ukp.lmf.transform.DBConfig; public class SenseAlignmentUtils { private final DBConfig source; private final DBConfig dest; private final int typeSource; private final int typeDest; private final MySQLDirectQueries sourceConnection; private final MySQLDirectQueries destConnection; private final String tempTable1, tempTable2; /** * * @param source * : Source Database * @param dest * : Dest Database * @param typeSource * : Type of Source (synset 1 or sense 0) * @param typeDest * : Type of Dest (synset 1 or sense 0) * @throws ClassNotFoundException * @throws IllegalAccessException * @throws InstantiationException */ public SenseAlignmentUtils(DBConfig source, DBConfig dest, int typeSource, int typeDest, String tempTableSource, String tempTableDest) throws SQLException, InstantiationException, IllegalAccessException, ClassNotFoundException { sourceConnection = new MySQLDirectQueries(source); destConnection = new MySQLDirectQueries(dest); this.typeSource = typeSource; this.typeDest = typeDest; this.tempTable1 = tempTableSource; this.tempTable2 = tempTableDest; this.source = source; this.dest = dest; } /** * In my case, I just need temporary tables like that * * @param usingSynsetAxis * : if true: the table has column: synsetId else no * @throws SQLException */ public void createDefaultTempTables(boolean usingSynsetAxis) throws SQLException { // create temp table String sql1 = null; String sql2 = null; switch (source.getDBType()) { case DBConfig.H2: sql1 = "CREATE TEMPORARY TABLE " + tempTable1 + " (senseId varchar(255) ," + ((usingSynsetAxis == true) ? "synsetId varchar(255) ," : "") + "externalReference varchar(255) )"; sql2 = "CREATE TEMPORARY TABLE " + tempTable2 + " (senseId varchar(255) ," + ((usingSynsetAxis == true) ? "synsetId varchar(255) ," : "") + "externalReference varchar(255) )"; break; case DBConfig.MYSQL: sql1 = "CREATE TEMPORARY TABLE " + tempTable1 + " (senseId varchar(255) CHARACTER SET utf8 NOT NULL," + ((usingSynsetAxis == true) ? "synsetId varchar(255) CHARACTER SET utf8 ," : "") + "externalReference varchar(255) CHARACTER SET utf8 NOT NULL)"; sql2 = "CREATE TEMPORARY TABLE " + tempTable2 + " (senseId varchar(255) CHARACTER SET utf8 NOT NULL," + ((usingSynsetAxis == true) ? "synsetId varchar(255) CHARACTER SET utf8," : "") + "externalReference varchar(255) CHARACTER SET utf8)"; break; } sourceConnection.executeUpdateQuery(sql1); destConnection.executeUpdateQuery(sql2); // Insert data into these temp tables //source insert2DefaultTemporaryTable(0, usingSynsetAxis); //dest insert2DefaultTemporaryTable(1, usingSynsetAxis); switch (source.getDBType()) { case DBConfig.H2: sourceConnection.executeUpdateQuery("CREATE INDEX IF NOT EXISTS i_" + tempTable1 + " ON " + tempTable1 + " (externalReference)"); destConnection.executeUpdateQuery("CREATE INDEX IF NOT EXISTS i_" + tempTable1 + " ON " + tempTable1 + " (externalReference)"); break; case DBConfig.MYSQL: // Create an index on externalReference. sourceConnection.executeUpdateQuery("ALTER TABLE " + tempTable1 + " ADD INDEX i_" + tempTable1 + "_externalReference (externalReference)"); destConnection.executeUpdateQuery("ALTER TABLE " + tempTable2 + " ADD INDEX i_" + tempTable2 + "_externalReference (externalReference)"); break; } } /** * In case you need your own temporary structure, use this method * * @param declareFields * : Declare types, constraints... for all fields <br> * E.g: * "senseId varchar(255) NOT NULL, externalReference varchar(255)" * @param sqlInsertData * : the data collected from other tables. <br> * E.g: "Select * from A join B on (A.a=B.b)". The selection * should be all rows, data you need for your later processing * @param DB * : 0 if source, 1 if dest RESOURCE * * @throws SQLException */ public void createTempTable(String declareFields, String sqlInsertData,int DB) throws SQLException{ switch (DB) { case 0:// Source // new: instead doQuery sourceConnection.executeUpdateQuery("Create TEMPORARY TABLE " + tempTable1 + " (" + declareFields + ")"); sourceConnection.executeUpdateQuery("INSERT INTO " + tempTable1 + " " + sqlInsertData); break; case 1:// Dest destConnection.executeUpdateQuery("Create TEMPORARY TABLE " + tempTable2 + " (" + declareFields + ")"); destConnection.executeUpdateQuery("INSERT INTO " + tempTable2 + " " + sqlInsertData); break; } } /** * * @param wnSynsetOffset * @param wnLemma * @param DB * : 0 = if source, 1 if dest resource * @return list of sense IDs * @throws SQLException */ public List<String> getSensesByWNSynsetOffsetAndLemma(String wnSynsetOffset, String wnLemma, int DB) throws SQLException { String[] temp = wnSynsetOffset.split("-"); String refId = "[POS: noun] "; if (temp[1].equals("a")){ refId=refId.replaceAll("noun", "adjective"); }else if (temp[1].equals("r")){ refId=refId.replaceAll("noun", "adverb"); }else if (temp[1].equals("v")){ refId=refId.replaceAll("noun", "verb"); } refId=refId+temp[0]; List<String> returnList = null; String sql = ""; ResultSet rs = null; switch (DB) { case 0: switch (source.getDBType()) { case DBConfig.H2: sql = "SELECT senseId, writtenForm, externalReference " + "FROM " + tempTable1 + " WHERE externalReference='" + refId + "' AND writtenForm='"+ wnLemma+ "'"; rs = sourceConnection.doQuery(sql); break; case DBConfig.MYSQL: sql = "SELECT senseId, writtenForm, externalReference " + "FROM " + tempTable1 + " WHERE externalReference=\"" + refId + "\" AND writtenForm=\""+ wnLemma+ "\""; rs = sourceConnection.doQuery(sql); break; } break; case 1: switch (source.getDBType()) { case DBConfig.H2: sql = "SELECT senseId, writtenForm, externalReference " + "FROM " + tempTable1 + " WHERE externalReference='" + refId + "' AND writtenForm='"+ wnLemma+ "'"; rs = destConnection.doQuery(sql); break; case DBConfig.MYSQL: sql = "SELECT senseId, writtenForm, externalReference " + "FROM " + tempTable1 + " WHERE externalReference=\"" + refId + "\" AND writtenForm=\""+ wnLemma+ "\""; rs = destConnection.doQuery(sql); break; } break; } if (rs != null) { returnList = new ArrayList<String>(); while (rs.next()) { String sId = rs.getString("senseId"); returnList.add(sId); } } return returnList; } /** * * @param referenceID * : externalReference value * @param DB * : O if source, 1 if dest RESOURCE * @return list of senses by external reference ID * @throws SQLException */ public List<Sense> getSensesByExternalRefID(String referenceID, int DB,boolean usingSynsetId) throws SQLException { List<Sense> returnList = null; String sql = ""; ResultSet rs = null; switch (DB) { case 0: switch (source.getDBType()) { case DBConfig.H2: sql = "Select senseId, externalReference " + ((usingSynsetId == true) ? ",synsetId" : " ") + " from " + tempTable1 + " where externalReference='" + referenceID + "'"; rs = sourceConnection.doQuery(sql); break; case DBConfig.MYSQL: sql = "Select senseId, externalReference " + ((usingSynsetId == true) ? ",synsetId" : " ") + " from " + tempTable1 + " where externalReference=\"" + referenceID + "\""; rs = sourceConnection.doQuery(sql); break; } break; case 1: switch (source.getDBType()) { case DBConfig.H2: sql = "Select senseId, externalReference " + ((usingSynsetId == true) ? ",synsetId" : " ") + " from " + tempTable2 + " where externalReference='" + referenceID + "'"; rs = destConnection.doQuery(sql); break; case DBConfig.MYSQL: sql = "Select senseId, externalReference " + ((usingSynsetId == true) ? ",synsetId" : " ") + " from " + tempTable2 + " where externalReference=\"" + referenceID + "\""; rs = destConnection.doQuery(sql); break; } break; } if (rs != null) { returnList = new ArrayList<Sense>(); while (rs.next()) { String ref = rs.getString("externalReference"); if (ref.equals(referenceID)) { Sense sense = new Sense(); // senseAlignment just need sense ID so hopefully, the null // value of // other attributes will not cause any problem sense.setId(rs.getString("senseId")); if (usingSynsetId) { Synset synset = new Synset(); synset.setId(rs.getString("synsetId")); sense.setSynset(synset); } returnList.add(sense); } } } return returnList; } public void destroyTempTable() throws SQLException { destConnection.executeUpdateQuery("DROP TABLE " + tempTable1); sourceConnection.executeUpdateQuery("DROP TABLE " + tempTable2); } private void insert2DefaultTemporaryTable(int DB,boolean usingSynsetAxis) throws SQLException { ResultSet rs = null; int rows = 0; int loop = 0; int limitRowsEachInsertion=1000000; /* * I have to limit the number of rows for each time insert database, because * the innodb_buffer_pool_size is limited to 8MB. * So I decided to import data into db each time 100k rows. * * Tested! Not efficient! Please set innodb_buffer_pool_size to 256MB */ String sql1 = " Select Count(Sense.senseId) as count from Sense JOIN MonolingualExternalRef on " + " (Sense.senseId=MonolingualExternalRef.senseId)"; String sql2 = " Select Count(Sense.senseId) as count " + "FROM Sense " + "JOIN Synset ON Sense.synsetId=Synset.synsetId " + "JOIN MonolingualExternalRef ON Synset.synsetId=MonolingualExternalRef.synsetId"; // Insert data into these temp tables String sql1_Insert = "INSERT INTO " + ((DB==0)?tempTable1:tempTable2) + " SELECT Sense.senseId, " + ((usingSynsetAxis == true) ? "Sense.synsetId," : "") + " MonolingualExternalRef.externalReference " + " FROM Sense JOIN MonolingualExternalRef" + " ON (Sense.senseId=MonolingualExternalRef.senseId)"; String sql2_Insert = "INSERT INTO " + ((DB==0)?tempTable1:tempTable2) + " SELECT Sense.senseId, " + ((usingSynsetAxis == true) ? "Sense.synsetId," : "") + " MonolingualExternalRef.externalReference " + "FROM Sense " + "JOIN Synset ON Sense.synsetId=Synset.synsetId " + "JOIN MonolingualExternalRef ON Synset.synsetId=MonolingualExternalRef.synsetId"; switch (DB) { case 0: if (typeSource == 1) { rs = sourceConnection.doQuery(sql2); } else if (typeSource == 0) { rs = sourceConnection.doQuery(sql1); } rs.next(); rows = rs.getInt("count"); loop = rows / limitRowsEachInsertion + 1; for (int i = 0; i < loop; i++) { int first = i*limitRowsEachInsertion; if (typeSource == 1) { sourceConnection.executeUpdateQuery(sql2_Insert+" LIMIT "+first+","+limitRowsEachInsertion); } else if (typeSource == 0) { sourceConnection.executeUpdateQuery(sql1_Insert+" LIMIT "+first+","+limitRowsEachInsertion); } } break; case 1: rs = null; if (typeDest == 1) { rs = destConnection.doQuery(sql2); } else if (typeDest == 0) { rs = destConnection.doQuery(sql1); } rs.next(); rows = rs.getInt("count"); //100k rows for each time loop = rows / limitRowsEachInsertion + 1; for (int i = 0; i < loop; i++) { int first=i*limitRowsEachInsertion; if (typeDest == 1) { destConnection.executeUpdateQuery(sql2_Insert+" LIMIT "+first+","+limitRowsEachInsertion); } else if (typeDest == 0) { destConnection.executeUpdateQuery(sql1_Insert+" LIMIT "+first+","+limitRowsEachInsertion); } } break; } } }