/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.io; import java.sql.SQLException; import java.util.List; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.IndexCachedDatabaseExampleTable; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.meta.BatchProcessing; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeBoolean; import com.rapidminer.parameter.ParameterTypeString; import com.rapidminer.tools.jdbc.DatabaseHandler; import com.rapidminer.tools.jdbc.connection.ConnectionEntry; import com.rapidminer.tools.jdbc.connection.ConnectionProvider; /** * <p>This operator reads an {@link com.rapidminer.example.ExampleSet} from an SQL * database. The data is load from a single table which is defined with the * table name parameter. Please note that table and column names are * often case sensitive. Databases may behave differently here.</p> * * <p>The most convenient way of defining the necessary parameters is the * configuration wizard. The most important parameters (database URL and user name) will * be automatically determined by this wizard and it is also possible to define * the special attributes like labels or ids.</p> * * <p>In contrast to the DatabaseExampleSource operator, which loads the data into * the main memory, this operator keeps the data in the database and performs * the data reading in batches. This allows RapidMiner to access data sets of * arbitrary sizes without any size restrictions.</p> * * <p>Please note the following important restrictions and notes: * <ul> * <li>only manifested tables (no views) are allowed as the base for this data caching operator,</li> * <li>if no primary key and index is present, a new column named RM_INDEX is created and automatically used as primary key,</li> * <li>if a primary key is already present in the specified table, a new table named RM_MAPPED_INDEX is created mapping a new index column RM_INDEX to the original primary key.</li> * <li>users can provide the primary key column RM_INDEX themself which then has to be an integer valued index attribute, counting starts with 1 without any gaps or missing values for all rows</li> * </ul> * Beside the new index column or the mapping table creation <em>no writing actions</em> are performed * in the database. Moreover, <em>data sets built on top of a cached database table do not support * writing actions at all</em>. Users have to materialize the data, change it, and write it * back into a new table of the database (e.g.with the {@link DatabaseExampleSetWriter}. If * the data set is large, users can employ the operator {@link BatchProcessing} for splitting up * this data change task. * </p> * * @author Ingo Mierswa, Tobias Malbrecht */ public class CachedDatabaseExampleSource extends AbstractExampleSource implements ConnectionProvider { public static final String PARAMETER_RECREATE_INDEX = "recreate_index"; private DatabaseHandler databaseHandler; public CachedDatabaseExampleSource(OperatorDescription description) { super(description); } @Override public ExampleSet createExampleSet() throws OperatorException { try { databaseHandler = DatabaseHandler.getConnectedDatabaseHandler(this); String tableName = getParameterAsString(DatabaseHandler.PARAMETER_TABLE_NAME); boolean recreateIndex = getParameterAsBoolean(PARAMETER_RECREATE_INDEX); IndexCachedDatabaseExampleTable table = new IndexCachedDatabaseExampleTable(databaseHandler, tableName, DataRowFactory.TYPE_DOUBLE_ARRAY, recreateIndex, this); // TODO copy functionality from ResultSetExampleSource and remove ResultSetExampleSource! return ResultSetExampleSource.createExampleSet(table, this); } catch (SQLException e) { throw new UserError(this, e, 304, e.getMessage()); } } @Override public void processFinished() { disconnect(); } private void disconnect() { // close database connection if (databaseHandler != null) { try { databaseHandler.disconnect(); databaseHandler = null; } catch (SQLException e) { logWarning("Cannot disconnect from database: " + e); } } } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> types = super.getParameterTypes(); types.addAll(DatabaseHandler.getConnectionParameterTypes(this)); types.addAll(DatabaseHandler.getQueryParameterTypes(this, true)); types.add(new ParameterTypeBoolean(PARAMETER_RECREATE_INDEX, "Indicates if a recreation of the index or index mapping table should be forced.", false)); ParameterType type = new ParameterTypeString(ResultSetExampleSource.PARAMETER_LABEL_ATTRIBUTE, "The (case sensitive) name of the label attribute"); type.setExpert(false); types.add(type); types.add(new ParameterTypeString(ResultSetExampleSource.PARAMETER_ID_ATTRIBUTE, "The (case sensitive) name of the id attribute")); types.add(new ParameterTypeString(ResultSetExampleSource.PARAMETER_WEIGHT_ATTRIBUTE, "The (case sensitive) name of the weight attribute")); return types; } @Override public ConnectionEntry getConnectionEntry() { return DatabaseHandler.getConnectionEntry(this); } }