/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.operator.io;
import java.io.File;
import java.io.IOException;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.table.DatabaseExampleTable;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.gui.wizards.DBExampleSourceConfigurationWizardCreator;
import com.rapidminer.operator.IOObject;
import com.rapidminer.operator.OperatorDescription;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.UserError;
import com.rapidminer.parameter.ParameterType;
import com.rapidminer.parameter.ParameterTypeBoolean;
import com.rapidminer.parameter.ParameterTypeCategory;
import com.rapidminer.parameter.ParameterTypeConfiguration;
import com.rapidminer.parameter.ParameterTypeFile;
import com.rapidminer.parameter.ParameterTypePassword;
import com.rapidminer.parameter.ParameterTypeString;
import com.rapidminer.parameter.ParameterTypeText;
import com.rapidminer.parameter.TextType;
import com.rapidminer.parameter.UndefinedParameterError;
import com.rapidminer.tools.Tools;
import com.rapidminer.tools.jdbc.DatabaseHandler;
import com.rapidminer.tools.jdbc.DatabaseService;
/**
* <p>This operator reads an {@link com.rapidminer.example.ExampleSet} from an SQL
* database. The SQL query can be passed to RapidMiner via a parameter or, in case of
* long SQL statements, in a separate file. Please note that column names are
* often case sensitive. Databases may behave differently here.</p>
*
* <p>The most convenient way of defining the necessary parameters is the
* configuration wizard. The most important parameters (database URL and user name) will
* be automatically determined by this wizard and it is also possible to define
* the special attributes like labels or ids.</p>
*
* <p>Please note that this operator supports two basic working modes:</p>
* <ol>
* <li>reading the data from the database and creating an example table in main memory</li>
* <li>keeping the data in the database and directly working on the database table </li>
* </ol>
* <p>The latter possibility will be turned on by the parameter "work_on_database".
* Please note that this working mode is still regarded as experimental and errors might
* occur. In order to ensure proper data changes the database working mode is only allowed
* on a single table which must be defined with the parameter "table_name".
* IMPORTANT: If you encounter problems during data updates (e.g. messages that the result set is not
* updatable) you probably have to define a primary key for your table.</p>
*
* <p>If you are not directly working on the database, the data will be read with an arbitrary
* SQL query statement (SELECT ... FROM ... WHERE ...) defined by "query" or "query_file".
* The memory mode is the recommended way of using this operator. This is especially important for
* following operators like learning schemes which would often load (most of) the data into main memory
* during the learning process. In these cases a direct working on the database is not recommended
* anyway.</p>
*
* <h5>Warning</h5>
* As the java <code>ResultSetMetaData</code> interface does not provide
* information about the possible values of nominal attributes, the internal
* indices the nominal values are mapped to will depend on the ordering
* they appear in the table. This may cause problems only when processes are
* split up into a training process and an application or testing process.
* For learning schemes which are capable of handling nominal attributes, this
* is not a problem. If a learning scheme like a SVM is used with nominal data,
* RapidMiner pretends that nominal attributes are numerical and uses indices for the
* nominal values as their numerical value. A SVM may perform well if there are
* only two possible values. If a test set is read in another process, the
* nominal values may be assigned different indices, and hence the SVM trained
* is useless. This is not a problem for label attributes, since the classes can
* be specified using the <code>classes</code> parameter and hence, all
* learning schemes intended to use with nominal data are safe to use.
*
* @rapidminer.todo Fix the above problem. This may not be possible effeciently since
* it is not supported by the Java ResultSet interface.
*
* @author Ingo Mierswa
* @version $Id: DatabaseExampleSource.java,v 1.11 2006/03/27 13:22:00
* ingomierswa Exp $
*/
public class DatabaseExampleSource extends ResultSetExampleSource {
/** The parameter name for "If set to true, the data read from the database is NOT copied to main memory. All operations that change data will modify the database." */
public static final String PARAMETER_WORK_ON_DATABASE = "work_on_database";
/** The parameter name for "Indicates the used database system" */
public static final String PARAMETER_DATABASE_SYSTEM = "database_system";
/** The parameter name for "The complete URL connection string for the database, e.g. 'jdbc:mysql://foo.bar:portnr/database'" */
public static final String PARAMETER_DATABASE_URL = "database_url";
/** The parameter name for "Database username." */
public static final String PARAMETER_USERNAME = "username";
/** The parameter name for "Password for the database." */
public static final String PARAMETER_PASSWORD = "password";
/** The parameter name for "SQL query. If not set, the query is read from the file specified by 'query_file'." */
public static final String PARAMETER_QUERY = "query";
/** The parameter name for "File containing the query. Only evaluated if 'query' is not set." */
public static final String PARAMETER_QUERY_FILE = "query_file";
/** The parameter name for "Use this table if work_on_database is true or no other query is specified." */
public static final String PARAMETER_TABLE_NAME = "table_name";
/** The parameter name for "Whitespace separated list of possible class values of the label attribute." */
public static final String PARAMETER_CLASSES = "classes";
/** The database connection handler. */
private DatabaseHandler databaseHandler;
/** This is only used for the case that the data is read into memory. */
private Statement statement;
public DatabaseExampleSource(OperatorDescription description) {
super(description);
}
public IOObject[] apply() throws OperatorException {
if (!getParameterAsBoolean(PARAMETER_WORK_ON_DATABASE)) {
IOObject[] output = super.apply();
disconnect();
return output;
} else {
try {
String tableName = getParameterAsString(PARAMETER_TABLE_NAME);
if (tableName == null) {
throw new UserError(this, 201, new Object[] { "table_name", "work_on_database", "true" });
}
ExampleTable table = DatabaseExampleTable.createDatabaseExampleTable(getConnectedDatabaseHandler(), tableName);
ExampleSet exampleSet = createExampleSet(table, this);
// statistics are necessary for value mapping
exampleSet.recalculateAllAttributeStatistics();
return new IOObject[] { exampleSet };
} catch (SQLException e) {
throw new UserError(this, e, 304, e.getMessage());
}
}
}
public void tearDown() {
if (this.statement != null) {
try {
this.statement.close();
} catch (SQLException e) {
logWarning("Cannot close statement.");
}
this.statement = null;
}
}
public void setNominalValues(List attributeList, ResultSet resultSet, Attribute label) throws UndefinedParameterError {
setNominalValuesForLabel(label);
}
private void setNominalValuesForLabel(Attribute label) throws UndefinedParameterError {
if (label != null) {
String classes = getParameterAsString(PARAMETER_CLASSES);
if (label.isNominal()) {
if (classes != null) {
String labelClasses[] = classes.split(" ");
for (int i = 0; i < labelClasses.length; i++) {
label.getMapping().mapString(labelClasses[i].trim());
}
}
} else {
if ((classes != null) && (classes.length() > 0))
logWarning("Ignoring classes for non-nominal attribute " + label.getName() + ".");
}
}
}
private String getQuery() throws OperatorException {
String query = getParameterAsString(PARAMETER_QUERY);
if (query != null)
query = query.trim();
String parameterUsed = null;
boolean warning = false;
if ((query == null) || (query.length() == 0)) {
File queryFile = getParameterAsFile(PARAMETER_QUERY_FILE);
if (queryFile != null) {
try {
query = Tools.readTextFile(queryFile);
parameterUsed = "query_file";
} catch (IOException ioe) {
throw new UserError(this, ioe, 302, new Object[] { queryFile, ioe.getMessage() });
}
if ((query == null) || (query.trim().length() == 0)) {
throw new UserError(this, 205, queryFile);
}
}
} else {
parameterUsed = "query";
if (isParameterSet(PARAMETER_QUERY_FILE)) {
warning = true;
}
}
if ((query == null) || (query.trim().length() == 0)) {
if (isParameterSet(PARAMETER_TABLE_NAME)) {
query = "SELECT * FROM " + getParameterAsString(PARAMETER_TABLE_NAME);
parameterUsed = "table_name";
}
} else if (isParameterSet(PARAMETER_TABLE_NAME)) {
warning = true;
}
if (query == null) {
throw new UserError(this, 202, new Object[] { "query", "query_file", "table_name" });
}
if (warning) {
logWarning("Only one of the parameters 'query', 'query_file', and 'table_name' have to be set. Using value of '" + parameterUsed + "'.");
}
return query;
}
protected DatabaseHandler getConnectedDatabaseHandler() throws OperatorException, SQLException {
String databaseURL = getParameterAsString(PARAMETER_DATABASE_URL);
String username = getParameterAsString(PARAMETER_USERNAME);
String password = getParameterAsString(PARAMETER_PASSWORD);
return DatabaseHandler.getConnectedDatabaseHandler(databaseURL, username, password,
DatabaseService.getJDBCProperties().get(getParameterAsInt(PARAMETER_DATABASE_SYSTEM)), this);
}
/**
* This method reads the file whose name is given, extracts the database
* access information and the query from it and executes the query. The
* query result is returned as a ResultSet.
*/
public ResultSet getResultSet() throws OperatorException {
ResultSet rs = null;
try {
databaseHandler = getConnectedDatabaseHandler();
String query = getQuery();
log("Executing query: '" + query + "'");
this.statement = databaseHandler.createStatement(false);
rs = this.statement.executeQuery(query);
log("Query executed.");
} catch (SQLException sqle) {
throw new UserError(this, sqle, 304, sqle.getMessage());
}
return rs;
}
public void processFinished() {
disconnect();
}
private void disconnect() {
// close statement
tearDown();
// close database connection
if (databaseHandler != null) {
try {
databaseHandler.disconnect();
databaseHandler = null;
} catch (SQLException e) {
logWarning("Cannot disconnect from database: " + e);
}
}
}
public List<ParameterType> getParameterTypes() {
// super parameters are added below...
List<ParameterType> types = new LinkedList<ParameterType>();
Map<String, String> wizardParameters = new HashMap<String, String>();
wizardParameters.put(DBExampleSourceConfigurationWizardCreator.PARAMETER_ONLY_TABLE_NAMES, "false");
wizardParameters.put(DBExampleSourceConfigurationWizardCreator.PARAMETER_SHOW_DATABASE_CONFIGURATION, "true");
ParameterType type = new ParameterTypeConfiguration(DBExampleSourceConfigurationWizardCreator.class, wizardParameters, this);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeBoolean(PARAMETER_WORK_ON_DATABASE, "(EXPERIMENTAL!) If set to true, the data read from the database is NOT copied to main memory. All operations that change data will modify the database.", false));
type = new ParameterTypeCategory(PARAMETER_DATABASE_SYSTEM, "Indicates the used database system", DatabaseService.getDBSystemNames(), 0);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeString(PARAMETER_DATABASE_URL, "The complete URL connection string for the database, e.g. 'jdbc:mysql://foo.bar:portnr/database'", false));
types.add(new ParameterTypeString(PARAMETER_USERNAME, "Database username.", false));
types.add(new ParameterTypePassword(PARAMETER_PASSWORD, "Password for the database."));
type = new ParameterTypeText(PARAMETER_QUERY, "SQL query. If not set, the query is read from the file specified by 'query_file'.", TextType.SQL);
type.setExpert(false);
types.add(type);
types.add(new ParameterTypeFile(PARAMETER_QUERY_FILE, "File containing the query. Only evaluated if 'query' is not set.", null, true));
types.add(new ParameterTypeString(PARAMETER_TABLE_NAME, "Use this table if work_on_database is true or no other query is specified."));
types.addAll(super.getParameterTypes());
types.add(new ParameterTypeString(PARAMETER_CLASSES, "Whitespace separated list of possible class values of the label attribute."));
return types;
}
}