/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.io; import java.sql.SQLException; import java.util.HashMap; import java.util.List; import java.util.Map; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.IndexCachedDatabaseExampleTable; import com.rapidminer.gui.wizards.DBExampleSourceConfigurationWizardCreator; import com.rapidminer.operator.IOObject; import com.rapidminer.operator.Operator; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.meta.BatchProcessing; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.parameter.ParameterTypeConfiguration; import com.rapidminer.parameter.ParameterTypePassword; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.tools.jdbc.DatabaseHandler; import com.rapidminer.tools.jdbc.DatabaseService; /** * <p>This operator reads an {@link com.rapidminer.example.ExampleSet} from an SQL * database. The data is load from a single table which is defined with the * table name parameter. Please note that table and column names are * often case sensitive. Databases may behave differently here.</p> * * <p>The most convenient way of defining the necessary parameters is the * configuration wizard. The most important parameters (database URL and user name) will * be automatically determined by this wizard and it is also possible to define * the special attributes like labels or ids.</p> * * <p>In contrast to the DatabaseExampleSource operator, which loads the data into * the main memory, this operator keeps the data in the database and performs * the data reading in batches. This allows RapidMiner to access data sets of * arbitrary sizes without any size restrictions.</p> * * <p>Please note the following important restrictions and notes: * <ul> * <li>only manifested tables (no views) are allowed as the base for this data caching operator,</li> * <li>if no primary key and index is present, a new column named RM_INDEX is created and automatically used as primary key,</li> * <li>if a primary key is already present in the specified table, a new table named RM_MAPPED_INDEX is created mapping a new index column RM_INDEX to the original primary key.</li> * <li>users can provide the primary key column RM_INDEX themself which then has to be an integer valued index attribute, counting starts with 1 without any gaps or missing values for all rows</li> * </ul> * Beside the new index column or the mapping table creation <em>no writing actions</em> are performed * in the database. Moreover, <em>data sets built on top of a cached database table do not support * writing actions at all</em>. Users have to materialize the data, change it, and write it * back into a new table of the database (e.g.with the {@link DatabaseExampleSetWriter}. If * the data set is large, users can employ the operator {@link BatchProcessing} for splitting up * this data change task. * </p> * * @author Ingo Mierswa * @version $Id: CachedDatabaseExampleSource.java,v 1.7 2008/08/21 17:48:17 ingomierswa Exp $ */ public class CachedDatabaseExampleSource extends Operator { public static final String PARAMETER_RECREATE_INDEX = "recreate_index"; private DatabaseHandler databaseHandler; public CachedDatabaseExampleSource(OperatorDescription description) { super(description); } public IOObject[] apply() throws OperatorException { try { DatabaseHandler databaseHandler = getConnectedDatabaseHandler(); String tableName = getParameterAsString(DatabaseExampleSource.PARAMETER_TABLE_NAME); boolean recreateIndex = getParameterAsBoolean(PARAMETER_RECREATE_INDEX); IndexCachedDatabaseExampleTable table = new IndexCachedDatabaseExampleTable(databaseHandler, tableName, DataRowFactory.TYPE_DOUBLE_ARRAY, recreateIndex, this); return new IOObject[] { ResultSetExampleSource.createExampleSet(table, this) }; } catch (SQLException e) { throw new UserError(this, e, 304, e.getMessage()); } } protected DatabaseHandler getConnectedDatabaseHandler() throws OperatorException, SQLException { String databaseURL = getParameterAsString(DatabaseExampleSource.PARAMETER_DATABASE_URL); String username = getParameterAsString(DatabaseExampleSource.PARAMETER_USERNAME); String password = getParameterAsString(DatabaseExampleSource.PARAMETER_PASSWORD); return DatabaseHandler.getConnectedDatabaseHandler(databaseURL, username, password, DatabaseService.getJDBCProperties().get(getParameterAsInt(DatabaseExampleSource.PARAMETER_DATABASE_SYSTEM)), this); } public void processFinished() { disconnect(); } private void disconnect() { // close database connection if (databaseHandler != null) { try { databaseHandler.disconnect(); databaseHandler = null; } catch (SQLException e) { logWarning("Cannot disconnect from database: " + e); } } } public Class<?>[] getInputClasses() { return new Class[0]; } public Class<?>[] getOutputClasses() { return new Class[] { ExampleSet.class }; } public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); Map<String, String> wizardParameters = new HashMap<String, String>(); wizardParameters.put(DBExampleSourceConfigurationWizardCreator.PARAMETER_ONLY_TABLE_NAMES, "true"); wizardParameters.put(DBExampleSourceConfigurationWizardCreator.PARAMETER_SHOW_DATABASE_CONFIGURATION, "true"); ParameterType type = new ParameterTypeConfiguration(DBExampleSourceConfigurationWizardCreator.class, wizardParameters, this); type.setExpert(false); types.add(type); type = new ParameterTypeCategory(DatabaseExampleSource.PARAMETER_DATABASE_SYSTEM, "Indicates the used database system", DatabaseService.getDBSystemNames(), 0); type.setExpert(false); types.add(type); types.add(new ParameterTypeString(DatabaseExampleSource.PARAMETER_DATABASE_URL, "The complete URL connection string for the database, e.g. 'jdbc:mysql://foo.bar:portnr/database'", false)); types.add(new ParameterTypeString(DatabaseExampleSource.PARAMETER_USERNAME, "Database username.", false)); types.add(new ParameterTypePassword(DatabaseExampleSource.PARAMETER_PASSWORD, "Password for the database.")); types.add(new ParameterTypeString(DatabaseExampleSource.PARAMETER_TABLE_NAME, "Use this table as the base for this data access.", false)); types.add(new ParameterTypeBoolean(PARAMETER_RECREATE_INDEX, "Indicates if a recreation of the index or index mapping table should be forced.", false)); type = new ParameterTypeString(DatabaseExampleSource.PARAMETER_LABEL_ATTRIBUTE, "The (case sensitive) name of the label attribute"); type.setExpert(false); types.add(type); types.add(new ParameterTypeString(DatabaseExampleSource.PARAMETER_ID_ATTRIBUTE, "The (case sensitive) name of the id attribute")); types.add(new ParameterTypeString(DatabaseExampleSource.PARAMETER_WEIGHT_ATTRIBUTE, "The (case sensitive) name of the weight attribute")); return types; } }