/*******************************************************************************
* Copyright 2013
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.csniper.resbuild;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import de.tudarmstadt.ukp.csniper.webapp.evaluation.model.Corpus;
import de.tudarmstadt.ukp.csniper.webapp.search.CorpusService;
import de.tudarmstadt.ukp.csniper.webapp.search.SearchEngine;
import de.tudarmstadt.ukp.csniper.webapp.search.cqp.ContextUnit;
import de.tudarmstadt.ukp.csniper.webapp.search.cqp.CqpEngine;
import de.tudarmstadt.ukp.csniper.webapp.search.cqp.CqpQuery;
public class EvaluationItemFixer
{
private static final Log log = LogFactory.getLog(EvaluationItemFixer.class);
private static final String HOST = "jdbc:mysql://loewe-ncc.ukp.informatik.tu-darmstadt.de/";
private static final String DATABASE = "csniper";
private static final String USER = "csniper";
private static final String PASSWORD = "csniper";
// private static final String CQP_EXECUTABLE = "/opt/imscwb/cqp";
// private static final String CORPUS_REPOSITORY = "/srv/csniper";
// private static final String HOST = "jdbc:mysql://127.0.0.1/";
// private static final String DATABASE = "csniper";
// private static final String USER = "root";
// private static final String PASSWORD = "gugaguga";
private static final String CQP_EXECUTABLE = "D:\\ukp\\cwb-3.4.3\\bin\\cqp.exe";
private static final String REPOSITORY = "D:\\ukp\\data\\csniper";
private static final String LOG_SUCCESSFUL = "C:\\users\\dodinh\\desktop\\successful.txt";
private static final String LOG_FAILED = "C:\\users\\dodinh\\desktop\\failed.txt";
private static final String LRB = "-LRB-";
private static final String RRB = "-RRB-";
private static Connection connection;
private static CqpEngine engine = new CqpEngine()
{
private static final long serialVersionUID = 1L;
{
setCorpusService(new CorpusService()
{
@Override
public List<SearchEngine> listEngines(String aCorpusId)
{
return null;
}
@Override
public List<String> listCorpora()
{
return null;
}
@Override
public File getRepositoryPath()
{
return new File(REPOSITORY);
}
@Override
public Corpus getCorpus(String aCorpusId)
{
return null;
}
});
setCqpExecutable(new File(CQP_EXECUTABLE));
}
};
public static void main(String[] args)
{
connect(HOST, DATABASE, USER, PASSWORD);
Map<Integer, String> items = new HashMap<Integer, String>();
Map<Integer, String> failed = new HashMap<Integer, String>();
// fetch coveredTexts of dubious items and clean it
PreparedStatement select = null;
try {
StringBuilder selectQuery = new StringBuilder();
selectQuery.append("SELECT * FROM EvaluationItem ");
selectQuery.append("WHERE LOCATE(coveredText, ' ') > 0 ");
selectQuery.append("OR LOCATE('" + LRB + "', coveredText) > 0 ");
selectQuery.append("OR LOCATE('" + RRB + "', coveredText) > 0 ");
selectQuery.append("OR LEFT(coveredText, 1) = ' ' ");
selectQuery.append("OR RIGHT(coveredText, 1) = ' ' ");
select = connection.prepareStatement(selectQuery.toString());
log.info("Running query [" + selectQuery.toString() + "].");
ResultSet rs = select.executeQuery();
while (rs.next()) {
int id = rs.getInt("id");
String coveredText = rs.getString("coveredText");
try {
// special handling of double whitespace: in this case, re-fetch the text
if (coveredText.contains(" ")) {
coveredText = retrieveCoveredText(rs.getString("collectionId"),
rs.getString("documentId"), rs.getInt("beginOffset"),
rs.getInt("endOffset"));
}
// replace bracket placeholders and trim the text
coveredText = StringUtils.replace(coveredText, LRB, "(");
coveredText = StringUtils.replace(coveredText, RRB, ")");
coveredText = coveredText.trim();
items.put(id, coveredText);
}
catch (IllegalArgumentException e) {
failed.put(id, e.getMessage());
}
}
}
catch (SQLException e) {
log.error("Exception while selecting: " + e.getMessage());
}
finally {
closeQuietly(select);
}
// write logs
BufferedWriter bwf = null;
BufferedWriter bws = null;
try {
bwf = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(
LOG_FAILED)), "UTF-8"));
for (Entry<Integer, String> e : failed.entrySet()) {
bwf.write(e.getKey() + " - " + e.getValue() + "\n");
}
bws = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(new File(
LOG_SUCCESSFUL)), "UTF-8"));
for (Entry<Integer, String> e : items.entrySet()) {
bws.write(e.getKey() + " - " + e.getValue() + "\n");
}
}
catch (IOException e) {
log.error("Got an IOException while writing the log files.");
}
finally {
IOUtils.closeQuietly(bwf);
IOUtils.closeQuietly(bws);
}
log.info("Texts for [" + items.size() + "] items need to be cleaned up.");
// update the dubious items with the cleaned coveredText
PreparedStatement update = null;
try {
String updateQuery = "UPDATE EvaluationItem SET coveredText = ? WHERE id = ?";
update = connection.prepareStatement(updateQuery);
int i = 0;
for (Entry<Integer, String> e : items.entrySet()) {
int id = e.getKey();
String coveredText = e.getValue();
// update item in database
update.setString(1, coveredText);
update.setInt(2, id);
update.executeUpdate();
log.debug("Updating " + id + " with [" + coveredText + "]");
// show percentage of updated items
i++;
int part = (int) Math.ceil((double) items.size() / 100);
if (i % part == 0) {
log.info(i / part + "% finished (" + i + "/" + items.size() + ").");
}
}
}
catch (SQLException e) {
log.error("Exception while updating: " + e.getMessage());
}
finally {
closeQuietly(update);
}
closeQuietly(connection);
}
private static void connect(String aHost, String aDatabase, String aUser, String aPassword)
{
String url = aHost + aDatabase + "?user=" + aUser + "&password=" + aPassword;
try {
Class.forName("com.mysql.jdbc.Driver");
connection = DriverManager.getConnection(url);
}
catch (ClassNotFoundException e) {
throw new RuntimeException("Failed to load the specified database driver.", e);
}
catch (SQLException e) {
throw new RuntimeException(
"There was an unrecoverable error while connecting to the database.", e);
}
}
/**
* Using a workaround for java < 1.7.
*/
private static void closeQuietly(Object aAutoCloseable)
{
try {
if (aAutoCloseable != null) {
if (aAutoCloseable instanceof Connection) {
((Connection) aAutoCloseable).close();
}
if (aAutoCloseable instanceof Statement) {
((Statement) aAutoCloseable).close();
}
}
}
catch (Exception e) {
log.error("There was an unrecoverable error while closing [" + aAutoCloseable + "].", e);
}
}
private static String retrieveCoveredText(final String aCollectionId, String aDocumentId,
int aBeginOffset, int aEndOffset)
{
String coveredText;
CqpQuery query = new CqpQuery(engine, "", aCollectionId);
query.setContext(0, 0, ContextUnit.CHARACTER);
String queryString = "[begin=\"" + aBeginOffset + "\"] []* [end=\"" + aEndOffset
+ "\"] :: match.text_id=\"" + aDocumentId + "\"";
log.trace(queryString);
query.runQuery(queryString);
if (query.size() > 1) {
log.warn("More than 1 entry found: " + query);
}
if (query.size() == 0) {
log.error("Nothing found for: " + queryString);
query.close();
throw new IllegalArgumentException(queryString);
}
coveredText = query.cat(1).get(0).getCoveredText();
query.close();
return coveredText;
}
}