/*******************************************************************************
* Copyright 2012
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package de.tudarmstadt.ukp.csniper.ml;
import static java.util.Arrays.asList;
import java.io.IOException;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.apache.uima.fit.component.CasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
/**
* Collection reader for JDBC database.The obtained data will be written into CAS DocumentText as
* well as fields of the {@link DocumentMetaData} annotation.
* <p>
* The field names are available as constants and begin with <code>CAS_</code>. Please specify the
* mapping of the columns and the field names in the query. For example,
* <p>
* <code>SELECT text AS cas_text, title AS cas_metadata_title FROM test_table</code>
* <p>
* will create a CAS for each record, write the content of "text" column into CAS documen text and
* that of "title" column into the document title field of the {@link DocumentMetaData} annotation.
*
* @author Shuo Yang
*/
public class JdbcCustomReader
extends CasCollectionReader_ImplBase
{
public static final String CAS_TEXT = "cas_text";
public static final String CAS_METADATA_TITLE = "cas_metadata_title";
public static final String CAS_METADATA_LANGUAGE = "cas_metadata_language";
public static final String CAS_METADATA_DOCUMENT_ID = "cas_metadata_document_id";
public static final String CAS_METADATA_COLLECTION_ID = "cas_metadata_collection_id";
public static final String CAS_METADATA_DOCUMENT_URI = "cas_metadata_document_uri";
public static final String CAS_METADATA_DOCUMENT_BASE_URI = "cas_metadata_document_base_uri";
private static final Set<String> CAS_COLUMNS = new HashSet<String>(asList(
CAS_TEXT, CAS_METADATA_TITLE, CAS_METADATA_LANGUAGE, CAS_METADATA_DOCUMENT_ID,
CAS_METADATA_COLLECTION_ID, CAS_METADATA_DOCUMENT_URI, CAS_METADATA_DOCUMENT_BASE_URI));
/**
* Optional for uimaFIT, mandatory for UIMA,. Specify the class name of the JDBC driver.
* <p>
* If used with uimaFIT and the value is not given, <code>com.mysql.jdbc.Driver</code> will be
* taken.
*/
public static final String PARAM_DRIVER = "Driver";
@ConfigurationParameter(name = PARAM_DRIVER, mandatory = true, defaultValue = "com.mysql.jdbc.Driver")
private String driver;
/**
* Optional for uimaFIT, mandatory for UIMA. Specifies the URL to the database.
* <p>
* If used with uimaFIT and the value is not given, <code>jdbc:mysql://127.0.0.1/</code> will be
* taken.
*/
public static final String PARAM_CONNECTION = "Connection";
@ConfigurationParameter(name = PARAM_CONNECTION, mandatory = true, defaultValue = "jdbc:mysql://127.0.0.1/")
private String connection;
/**
* Mandatory. Specifies name of the database to be accessed.
*/
public static final String PARAM_DATABASE = "Database";
@ConfigurationParameter(name = PARAM_DATABASE, mandatory = true)
private String database;
/**
* Mandatory. Specifies the user name for database access.
*/
public static final String PARAM_USER = "User";
@ConfigurationParameter(name = PARAM_USER, mandatory = true)
private String user;
/**
* Mandatory. Specifies the password for database access.
*/
public static final String PARAM_PASSWORD = "Password";
@ConfigurationParameter(name = PARAM_PASSWORD, mandatory = true)
private String password;
/**
* Mandatory. Specifies the query.
*/
public static final String PARAM_QUERY = "Query";
@ConfigurationParameter(name = PARAM_QUERY, mandatory = true)
private String query;
/**
* Optional. Query which sets user variables for the "real" query.
*/
public static final String PARAM_SETTER_QUERY = "SetterQuery";
@ConfigurationParameter(name = PARAM_SETTER_QUERY, mandatory = true)
private String setterQuery;
private Connection sqlConnection;
private ResultSet resultSet;
private int resultSetSize;
private int completed;
private Set<String> columnNames;
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException
{
super.initialize(context);
openDatabaseConnection();
query();
}
private void openDatabaseConnection()
{
// Open connection
if (!connection.endsWith("/")) {
connection = connection + "/";
}
String url = connection + database + "?user=" + user + "&password=" + password;
try {
Class.forName(driver);
sqlConnection = DriverManager.getConnection(url);
}
catch (ClassNotFoundException e) {
throw new RuntimeException("Failed to load the specified database driver.", e);
}
catch (SQLException e) {
throw new RuntimeException(
"There was an unrecoverable error while connecting to the database.", e);
}
}
private void query()
{
try {
Statement statement = sqlConnection.createStatement();
// execute query which sets user variables
Iterator<String> it = Arrays.asList(StringUtils.split(setterQuery, "\n")).iterator();
StringBuilder sb = new StringBuilder();
while (it.hasNext()) {
String line = it.next();
if (line.trim().startsWith("#")) {
continue;
}
else if (line.trim().endsWith(";")) {
sb.append(line);
statement.addBatch(sb.toString());
sb = new StringBuilder();
}
else {
sb.append(line);
}
}
statement.executeBatch();
statement.executeQuery(query);
resultSet = statement.getResultSet();
resultSet.last();
resultSetSize = resultSet.getRow();
resultSet.beforeFirst();
completed = 0;
// Store available column names
columnNames = new HashSet<String>();
ResultSetMetaData meta = resultSet.getMetaData();
for (int i = 1; i < meta.getColumnCount() + 1; i++) {
String columnName = meta.getColumnLabel(i);
columnNames.add(columnName);
if (!CAS_COLUMNS.contains(columnName)) {
getLogger().warn("Unknown column [" + columnName + "].");
}
}
}
catch (SQLException e) {
throw new RuntimeException(
"There was an unrecoverable error executing the specified SQL statement.", e);
}
}
@Override
public void getNext(CAS cas)
throws IOException, CollectionException
{
// Store data into CAS
DocumentMetaData metadata = null;
try {
metadata = DocumentMetaData.create(cas);
}
catch (CASException e) {
throw new CollectionException(e);
}
cas.setDocumentText(getStringQuietly(CAS_TEXT));
metadata.setDocumentTitle(getStringQuietly(CAS_METADATA_TITLE));
metadata.setLanguage(getStringQuietly(CAS_METADATA_LANGUAGE));
metadata.setDocumentId(getStringQuietly(CAS_METADATA_DOCUMENT_ID));
metadata.setCollectionId(getStringQuietly(CAS_METADATA_COLLECTION_ID));
metadata.setDocumentUri(getStringQuietly(CAS_METADATA_DOCUMENT_URI));
metadata.setDocumentBaseUri(getStringQuietly(CAS_METADATA_DOCUMENT_BASE_URI));
completed++;
}
private String getStringQuietly(String columnName)
{
if (columnNames.contains(columnName)) {
try {
return resultSet.getString(columnName);
}
catch (SQLException e) {
getLogger().warn("Error getting value for column [" + columnName + "].", e);
}
}
return null;
}
@Override
public Progress[] getProgress()
{
// REC: It should be possible to determine the current row and the total size of the result
// set and use this here to return progress information.
return new Progress[] { new ProgressImpl( completed, resultSetSize, "row" ) };
}
@Override
public boolean hasNext()
throws IOException, CollectionException
{
try {
// REC: this a problem because hasNext() might be called many times which would cause
// skipping of rows.
return resultSet.next();
}
catch (SQLException e) {
throw new CollectionException(e);
}
}
@Override
public void close()
throws IOException
{
try {
// REC: each of these should be in its own try/catch. Maybe IOUtils and friends
// have a proper null-safe static closeQuietly() method for SQL connections and result
// sets?
resultSet.close();
sqlConnection.close();
}
catch (SQLException e) {
throw new RuntimeException("Couldn't close the database connection.", e);
}
super.close();
}
}