/*
* Copyright 2012
* Ubiquitous Knowledge Processing (UKP) Lab
* Technische Universität Darmstadt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package de.tudarmstadt.ukp.dkpro.core.io.jdbc;
import de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData;
import org.apache.commons.dbutils.DbUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.CasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import java.io.IOException;
import java.sql.*;
import java.util.HashSet;
import java.util.Set;
import static java.util.Arrays.asList;
/**
* Collection reader for JDBC database.The obtained data will be written into CAS DocumentText as
* well as fields of the {@link DocumentMetaData} annotation.
* <p>
* The field names are available as constants and begin with <code>CAS_</code>. Please specify the
* mapping of the columns and the field names in the query. For example,
* <p>
* <code>SELECT text AS cas_text, title AS cas_metadata_title FROM test_table</code>
* <p>
* will create a CAS for each record, write the content of "text" column into CAS document text and
* that of "title" column into the document title field of the {@link DocumentMetaData} annotation.
*/
@TypeCapability(
outputs = {
"de.tudarmstadt.ukp.dkpro.core.api.metadata.type.DocumentMetaData" })
public class JdbcReader
extends CasCollectionReader_ImplBase
{
public static final String CAS_TEXT = "cas_text";
public static final String CAS_METADATA_TITLE = "cas_metadata_title";
public static final String CAS_METADATA_LANGUAGE = "cas_metadata_language";
public static final String CAS_METADATA_DOCUMENT_ID = "cas_metadata_document_id";
public static final String CAS_METADATA_COLLECTION_ID = "cas_metadata_collection_id";
public static final String CAS_METADATA_DOCUMENT_URI = "cas_metadata_document_uri";
public static final String CAS_METADATA_DOCUMENT_BASE_URI = "cas_metadata_document_base_uri";
private static final Set<String> CAS_COLUMNS = new HashSet<>(asList(
CAS_TEXT, CAS_METADATA_TITLE, CAS_METADATA_LANGUAGE, CAS_METADATA_DOCUMENT_ID,
CAS_METADATA_COLLECTION_ID, CAS_METADATA_DOCUMENT_URI, CAS_METADATA_DOCUMENT_BASE_URI));
/**
* Specify the class name of the JDBC driver.
* <p>
* If used with uimaFIT and the value is not given, <code>com.mysql.cj.jdbc.Driver</code> will be
* taken.
*/
public static final String PARAM_DRIVER = "driver";
@ConfigurationParameter(name = PARAM_DRIVER, mandatory = true, defaultValue = "com.mysql.cj.jdbc.Driver")
private String driver;
/**
* Specifies the URL to the database.
* <p>
* If used with uimaFIT and the value is not given, <code>jdbc:mysql://127.0.0.1/</code> will be
* taken.
* <p>
* Do not use this parameter to add additional parameters, but use {@link #PARAM_CONNECTION_PARAMS}
* instead.
*/
public static final String PARAM_CONNECTION = "connection";
@ConfigurationParameter(name = PARAM_CONNECTION, mandatory = true, defaultValue = "jdbc:mysql://127.0.0.1/")
private String connection;
/**
* Add additional parameters for the connection URL here in a single string: {@code [&propertyName1=propertyValue1[&propertyName2=propertyValue2]...]}.
*/
public static final String PARAM_CONNECTION_PARAMS = "connectionParams";
@ConfigurationParameter(name = PARAM_CONNECTION_PARAMS, mandatory = true, defaultValue = "")
private String connectionParams;
/**
* Specifies name of the database to be accessed.
*/
public static final String PARAM_DATABASE = "database";
@ConfigurationParameter(name = PARAM_DATABASE, mandatory = true)
private String database;
/**
* Specifies the user name for database access.
*/
public static final String PARAM_USER = "user";
@ConfigurationParameter(name = PARAM_USER, mandatory = true)
private String user;
/**
* Specifies the password for database access.
*/
public static final String PARAM_PASSWORD = "password";
@ConfigurationParameter(name = PARAM_PASSWORD, mandatory = true)
private String password;
/**
* Specifies the query.
*/
public static final String PARAM_QUERY = "query";
@ConfigurationParameter(name = PARAM_QUERY, mandatory = true)
private String query;
/**
* Specifies the language.
*/
public static final String PARAM_LANGUAGE = "language";
@ConfigurationParameter(name = PARAM_LANGUAGE, mandatory = false)
private String language;
private Connection sqlConnection;
private ResultSet resultSet;
private int resultSetSize;
private int completed;
private Set<String> columnNames;
@Override
public void initialize(UimaContext context)
throws ResourceInitializationException
{
super.initialize(context);
try {
openDatabaseConnection();
query();
}
catch (SQLException | ClassNotFoundException e) {
throw new ResourceInitializationException(e);
}
}
private void openDatabaseConnection()
throws SQLException, ClassNotFoundException
{
// Open connection
if (!connection.endsWith("/")) {
connection = connection + "/";
}
String url = connection + database + "?user=" + user + "&password=" + password
+ connectionParams;
Class.forName(driver);
sqlConnection = DriverManager.getConnection(url);
}
private void query()
throws SQLException
{
getLogger().info("Executing query: '" + query + "'.");
Statement statement = sqlConnection
.createStatement(ResultSet.TYPE_SCROLL_INSENSITIVE, ResultSet.CONCUR_READ_ONLY);
resultSet = statement.executeQuery(query);
resultSet.last();
resultSetSize = resultSet.getRow();
getLogger().info(resultSetSize + " documents retrieved.");
resultSet.beforeFirst();
completed = 0;
// Store available column names
columnNames = new HashSet<>();
ResultSetMetaData meta = resultSet.getMetaData();
for (int i = 1; i < meta.getColumnCount() + 1; i++) {
String columnName = meta.getColumnLabel(i);
columnNames.add(columnName);
if (!CAS_COLUMNS.contains(columnName)) {
getLogger().warn("Unknown column [" + columnName + "].");
}
}
}
@Override
public void getNext(CAS cas)
throws IOException, CollectionException
{
try {
resultSet.next(); // advance to next item (used to be done in hasNext())
}
catch (SQLException e) {
throw new CollectionException(e);
}
// Store data into CAS
DocumentMetaData metadata;
try {
metadata = DocumentMetaData.create(cas);
}
catch (CASException e) {
throw new CollectionException(e);
}
cas.setDocumentText(getStringQuietly(CAS_TEXT));
metadata.setDocumentTitle(getStringQuietly(CAS_METADATA_TITLE));
String language = getStringQuietly(CAS_METADATA_LANGUAGE);
if (language == null) {
language = this.language;
}
metadata.setLanguage(language);
metadata.setDocumentId(getStringQuietly(CAS_METADATA_DOCUMENT_ID));
metadata.setCollectionId(getStringQuietly(CAS_METADATA_COLLECTION_ID));
metadata.setDocumentUri(getStringQuietly(CAS_METADATA_DOCUMENT_URI));
metadata.setDocumentBaseUri(getStringQuietly(CAS_METADATA_DOCUMENT_BASE_URI));
completed++;
}
private String getStringQuietly(String columnName)
{
if (columnNames.contains(columnName)) {
try {
return resultSet.getString(columnName);
}
catch (SQLException e) {
getLogger().warn("Error getting value for column [" + columnName + "].", e);
}
}
return null;
}
@Override
public Progress[] getProgress()
{
// REC: It should be possible to determine the current row and the total size of the result
// set and use this here to return progress information.
return new Progress[] { new ProgressImpl(completed, resultSetSize, "row") };
}
@Override
public boolean hasNext()
throws IOException, CollectionException
{
return (completed < resultSetSize);
}
@Override
public void close()
throws IOException
{
DbUtils.closeQuietly(resultSet);
DbUtils.closeQuietly(sqlConnection);
}
}