/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example.table; import java.sql.DatabaseMetaData; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.tools.LoggingHandler; import com.rapidminer.tools.jdbc.DatabaseHandler; /** * This class is another data supplier for example sets. For performance reasons * one should use a {@link MemoryExampleTable} if the data is small enough for * the main memory. * * @author Ingo Mierswa * @version $Id: DatabaseExampleTable.java,v 2.14 2006/03/27 13:21:58 * ingomierswa Exp $ */ public class IndexCachedDatabaseExampleTable extends AbstractExampleTable { private static final long serialVersionUID = -3514641049341063136L; public static final int DEFAULT_BATCH_SIZE = 1500; // must be larger than 1000 due to plotter sampling public static final String INDEX_COLUMN_NAME = "RM_INDEX"; public static final String MAPPING_TABLE_NAME_PREFIX = "RM_MAPPING_"; private class CachedDataRowReader implements DataRowReader { private int currentTotalCursor = 0; public boolean hasNext() { return this.currentTotalCursor < size(); } public DataRow next() { DataRow dataRow = getDataRow(currentTotalCursor); this.currentTotalCursor++; return dataRow; } /** Will throw a new {@link UnsupportedOperationException} since {@link DataRowReader} does not have * to implement remove. */ public void remove() { throw new UnsupportedOperationException("The method 'remove' is not supported by DataRowReaders on databases!"); } } private DatabaseHandler databaseHandler; private String openQuote; private String closeQuote; private String tableName; private MemoryExampleTable batchExampleTable; private int currentBatchStartCursor = -1; private int size = -1; private int dataManagementType; private String mappingTableName; private String mappingPrimaryKey; public IndexCachedDatabaseExampleTable(DatabaseHandler databaseHandler, String tableName, int dataManagementType, boolean dropMappingTable, LoggingHandler logging) throws SQLException { super(new ArrayList<Attribute>()); this.databaseHandler = databaseHandler; this.openQuote = this.databaseHandler.getProperties().getIdentifierQuoteOpen(); this.closeQuote = this.databaseHandler.getProperties().getIdentifierQuoteClose(); this.tableName = tableName; this.dataManagementType = dataManagementType; // init size this.size = getSizeForTable(this.tableName); // init index createIndex(dropMappingTable, logging); // first: add attributes initAttributes(); // second: create batch table this.updateBatchAndCursors(0); } private void createIndex(boolean dropMappingTable, LoggingHandler logging) throws SQLException { // check if key and create key or key mapping table if necessary String primaryKeyName = getPrimaryKeyName(this.tableName); if (primaryKeyName == null) { this.mappingTableName = null; // use key created below directly this.mappingPrimaryKey = null; // no key: create RM_INDEX key logging.logNote("No primary key found: creating a new primary key with name '" + INDEX_COLUMN_NAME + "' for table '" + tableName + "'. This might take some time..."); createRMPrimaryKeyIndex(databaseHandler, this.tableName, openQuote, closeQuote); logging.logNote("Creation of primary key '" + INDEX_COLUMN_NAME + "' for table '" + tableName + "' finished."); } else { // key exists --> check for name --> wrong --> create mapping table if (!primaryKeyName.equals(INDEX_COLUMN_NAME)) { // init mapping names this.mappingTableName = MAPPING_TABLE_NAME_PREFIX + this.tableName; this.mappingPrimaryKey = primaryKeyName; // check if mapping table exists Statement statement = this.databaseHandler.createStatement(false); boolean exists = false; try { // check if table already exists (no exception and more than zero columns :-) ResultSet existingResultSet = statement.executeQuery("SELECT * FROM " + openQuote + this.mappingTableName + closeQuote + " WHERE 0 = 1"); if (existingResultSet.getMetaData().getColumnCount() > 0) exists = true; existingResultSet.close(); } catch (SQLException e) { // exception will be throw if table does not exist } statement.close(); if (exists) { int mappingSize = getSizeForTable(this.mappingTableName); if (mappingSize != this.size) { logging.logWarning("Size of internal mapping table '" + this.mappingTableName + "' and data table '" + this.tableName + "' differs. Recreate new mapping table!"); dropMappingTable = true; } } if (exists && dropMappingTable) { // drop mapping table if parameter is set String dropSQL = "DROP TABLE " + openQuote + this.mappingTableName + closeQuote; statement = this.databaseHandler.createStatement(false); statement.executeUpdate(dropSQL); statement.close(); exists = false; } if (!exists) { // copy key column logging.logNote("Primary key '" + primaryKeyName + "' found: creating a new mapping table '" + this.mappingTableName + "' which maps from the RapidMiner index '" + INDEX_COLUMN_NAME + "' to the primary key. This might take some time..."); String copyKeyQuery = "CREATE TABLE " + openQuote + this.mappingTableName + closeQuote + " AS ( SELECT " + openQuote + primaryKeyName + closeQuote + " FROM " + openQuote + this.tableName + closeQuote + " )"; statement = this.databaseHandler.createStatement(true); statement.execute(copyKeyQuery); statement.close(); // add new RM_INDEX key logging.logNote("Creating new primary key for mapping table '" + this.mappingTableName + "'..."); createRMPrimaryKeyIndex(databaseHandler, this.mappingTableName, openQuote, closeQuote); logging.logNote("Creation of mapping table '" + this.mappingTableName + "' finished."); } } else { this.mappingTableName = null; // use found key directly this.mappingPrimaryKey = null; } } } /** Subclasses might want to override this method if they do not support auto_increment. */ protected void createRMPrimaryKeyIndex(DatabaseHandler databaseHandler, String tableName, String openQuote, String closeQuote) throws SQLException { String addKeyQuery = "ALTER TABLE " + openQuote + tableName + closeQuote + " ADD " + openQuote + INDEX_COLUMN_NAME + closeQuote + " INT NOT NULL AUTO_INCREMENT PRIMARY KEY"; Statement statement = databaseHandler.createStatement(true); statement.execute(addKeyQuery); statement.close(); } private String getPrimaryKeyName(String tableName) throws SQLException { DatabaseMetaData meta = this.databaseHandler.getConnection().getMetaData(); ResultSet primaryKeys = meta.getPrimaryKeys(null, null, tableName); String primaryKeyName = null; while (primaryKeys.next()) { primaryKeyName = primaryKeys.getString(4); // name is in the fourth column break; } primaryKeys.close(); return primaryKeyName; } private void initAttributes() throws SQLException { Statement attributeStatement = this.databaseHandler.createStatement(false); String limitedQuery = "SELECT * FROM " + openQuote + tableName + closeQuote + " WHERE 0 = 1"; ResultSet attributeResultSet = attributeStatement.executeQuery(limitedQuery); List<Attribute> attributes = DatabaseHandler.createAttributes(attributeResultSet); Iterator<Attribute> a = attributes.iterator(); while (a.hasNext()) { if (a.next().getName().equals(INDEX_COLUMN_NAME)) a.remove(); } addAttributes(attributes); attributeResultSet.close(); attributeStatement.close(); } private void updateBatchAndCursors(int desiredRow) throws SQLException { desiredRow++; // RM starts counting with 0, DB with 1 // simple fetching strategy... boolean newBatch = false; int newOffset = this.currentBatchStartCursor; if (desiredRow > this.currentBatchStartCursor + (int)(0.9d * DEFAULT_BATCH_SIZE)) { // reaching the upper end newOffset = desiredRow - (int)(0.1 * DEFAULT_BATCH_SIZE); newBatch = true; } else if (desiredRow < this.currentBatchStartCursor) { // reaching the lower end newOffset = desiredRow - (int)(0.7 * DEFAULT_BATCH_SIZE); newBatch = true; } if (newOffset < 1) { newOffset = 1; newBatch = true; } // retrieve new batch if (newBatch) { if (this.mappingTableName == null) { // work directly on table Statement batchStatement = this.databaseHandler.createStatement(false); String limitedQuery = "SELECT * FROM " + openQuote + tableName + closeQuote + " WHERE " + openQuote + INDEX_COLUMN_NAME + closeQuote + " >= " + newOffset + " AND " + openQuote + INDEX_COLUMN_NAME + closeQuote + " < " + (newOffset + DEFAULT_BATCH_SIZE); ResultSet batchResultSet = batchStatement.executeQuery(limitedQuery); this.batchExampleTable = createExampleTableFromBatch(batchResultSet); batchResultSet.close(); batchStatement.close(); this.currentBatchStartCursor = newOffset; } else { // work with mapping table Statement batchStatement = this.databaseHandler.createStatement(false); String limitedQuery = "SELECT * FROM " + openQuote + this.tableName + closeQuote + "," + openQuote + this.mappingTableName + closeQuote + " WHERE " + openQuote + INDEX_COLUMN_NAME + closeQuote + " >= " + newOffset + " AND " + openQuote + INDEX_COLUMN_NAME + closeQuote + " < " + (newOffset + DEFAULT_BATCH_SIZE) + " AND " + openQuote + this.tableName + closeQuote + "." + openQuote + this.mappingPrimaryKey + closeQuote + " = " + openQuote + this.mappingTableName + closeQuote + "." + openQuote + this.mappingPrimaryKey + closeQuote; ResultSet batchResultSet = batchStatement.executeQuery(limitedQuery); this.batchExampleTable = createExampleTableFromBatch(batchResultSet); batchResultSet.close(); batchStatement.close(); this.currentBatchStartCursor = newOffset; } } } private MemoryExampleTable createExampleTableFromBatch(ResultSet batchResultSet) { List<Attribute> attributes = new ArrayList<Attribute>(getAttributes().length); for (Attribute attribute : getAttributes()) { attributes.add(attribute); } DataRowReader reader = new ResultSetDataRowReader(new DataRowFactory(dataManagementType, '.'), attributes, batchResultSet); return new MemoryExampleTable(attributes, reader); } public DataRow getDataRow(int index) { try { updateBatchAndCursors(index); return new NonWritableDataRow(this.batchExampleTable.getDataRow(index - this.currentBatchStartCursor + 1)); } catch (SQLException e) { throw new RuntimeException("Cannot retrieve data from database: " + e); } } public DataRowReader getDataRowReader() { return new CachedDataRowReader(); } private int getSizeForTable(String sizeTable) { int size = 0; try { Statement countStatement = this.databaseHandler.createStatement(false); String countQuery = "SELECT count(*) FROM " + openQuote + sizeTable + closeQuote; ResultSet countResultSet = countStatement.executeQuery(countQuery); countResultSet.next(); size = countResultSet.getInt(1); countResultSet.close(); countStatement.close(); } catch (SQLException e) { // do nothing } return size; } public int size() { return this.size; } }