/* * RapidMiner * * Copyright (C) 2001-2014 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.io; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.sql.Clob; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.ResultSetMetaData; import java.sql.SQLException; import java.sql.Timestamp; import java.sql.Types; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.operator.Annotations; import com.rapidminer.operator.OperatorDescription; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.operator.ports.metadata.AttributeMetaData; import com.rapidminer.operator.ports.metadata.ExampleSetMetaData; import com.rapidminer.operator.ports.metadata.MetaData; import com.rapidminer.parameter.ParameterType; import com.rapidminer.parameter.ParameterTypeCategory; import com.rapidminer.tools.I18N; import com.rapidminer.tools.LogService; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.ParameterService; import com.rapidminer.tools.Tools; import com.rapidminer.tools.jdbc.ColumnIdentifier; import com.rapidminer.tools.jdbc.DatabaseHandler; import com.rapidminer.tools.jdbc.StatementCreator; import com.rapidminer.tools.jdbc.TableName; import com.rapidminer.tools.jdbc.connection.ConnectionEntry; import com.rapidminer.tools.jdbc.connection.ConnectionProvider; /** * Reads an {@link ExampleSet} from an SQL {@link Connection} table. SQL datatypes are mapped to value types of * {@link Attribute}s by using {@link DatabaseHandler#getRapidMinerTypeIndex(int)}. Data is copied into main memory. * * Data can be read from either a table name or a query. In the first case, the meta data is retrieved from the database * meta data for that table for reasons of efficiency. In the latter case, a LIMIT 0 is appended to the query, which may * fail on some systems but which enables us to retrieve the structure of the table without performing the entire query * itself. * * @author Simon Fischer * */ public class DatabaseDataReader extends AbstractExampleSource implements ConnectionProvider { /** System property to decide whether meta data should be fetched from DB for database queries. */ public static final String PROPERTY_EVALUATE_MD_FOR_SQL_QUERIES = "rapidminer.gui.evaluate_meta_data_for_sql_queries"; public DatabaseDataReader(OperatorDescription description) { super(description); } private DatabaseHandler databaseHandler; @Override public ExampleSet read() throws OperatorException { try { ExampleSet result = super.read(); return result; } finally { if (databaseHandler != null && databaseHandler.getConnection() != null) { try { databaseHandler.getConnection().close(); } catch (SQLException e) { getLogger().log(Level.WARNING, "Error closing database connection: " + e, e); } } } } protected ResultSet getResultSet() throws OperatorException { try { databaseHandler = DatabaseHandler.getConnectedDatabaseHandler(this); String query = getQuery(databaseHandler.getStatementCreator()); if (query == null) { throw new UserError(this, 202, new Object[] { "query", "query_file", "table_name" }); } return databaseHandler.executeStatement(query, true, this, getLogger()); } catch (SQLException sqle) { throw new UserError(this, sqle, 304, sqle.getMessage()); } } @Override public ExampleSet createExampleSet() throws OperatorException { ResultSet resultSet = getResultSet(); MemoryExampleTable table; try { List<Attribute> attributes = getAttributes(resultSet); table = createExampleTable(resultSet, attributes, getParameterAsInt(ExampleSource.PARAMETER_DATAMANAGEMENT), getLogger()); } catch (SQLException e) { throw new UserError(this, e, 304, e.getMessage()); } finally { try { resultSet.close(); } catch (SQLException e) { getLogger().log(Level.WARNING, "DB error closing result set: " + e, e); } } return table.createExampleSet(); } @Override public MetaData getGeneratedMetaData() throws OperatorException { ExampleSetMetaData metaData = new ExampleSetMetaData(); try { databaseHandler = DatabaseHandler.getConnectedDatabaseHandler(this); switch (getParameterAsInt(DatabaseHandler.PARAMETER_DEFINE_QUERY)) { case DatabaseHandler.QUERY_TABLE: List<ColumnIdentifier> columns = databaseHandler.getAllColumnNames(DatabaseHandler.getSelectedTableName(this), databaseHandler.getConnection().getMetaData()); for (ColumnIdentifier column : columns) { metaData.addAttribute(new AttributeMetaData(column.getColumnName(), DatabaseHandler.getRapidMinerTypeIndex(column.getSqlType()))); } break; case DatabaseHandler.QUERY_QUERY: case DatabaseHandler.QUERY_FILE: default: if (!"false".equals(ParameterService.getParameterValue(PROPERTY_EVALUATE_MD_FOR_SQL_QUERIES))) { String query = getQuery(databaseHandler.getStatementCreator()); PreparedStatement prepared = databaseHandler.getConnection().prepareStatement(query); // query = "SELECT * FROM (" + query + ") dummy WHERE 1=0"; // ResultSet resultSet = databaseHandler.executeStatement(query, true, this, getLogger()); List<Attribute> attributes = getAttributes(prepared.getMetaData()); for (Attribute att : attributes) { metaData.addAttribute(new AttributeMetaData(att)); } prepared.close(); } break; } } catch (SQLException e) { //LogService.getRoot().log(Level.WARNING, "Failed to fetch meta data: " + e, e); LogService.getRoot().log(Level.WARNING, I18N.getMessage(LogService.getRoot().getResourceBundle(), "com.rapidminer.operator.io.DatabaseDataReader.fetching_meta_data_error", e), e); } finally { try { if (databaseHandler != null && databaseHandler.getConnection() != null) { databaseHandler.disconnect(); } } catch (SQLException e) { getLogger().log(Level.WARNING, "DB error closing connection: " + e, e); } } return metaData; } public static MemoryExampleTable createExampleTable(ResultSet resultSet, List<Attribute> attributes, int dataManagementType, Logger logger) throws SQLException, OperatorException { ResultSetMetaData metaData = resultSet.getMetaData(); Attribute[] attributeArray = attributes.toArray(new Attribute[attributes.size()]); MemoryExampleTable table = new MemoryExampleTable(attributes); DataRowFactory factory = new DataRowFactory(dataManagementType, '.'); while (resultSet.next()) { DataRow dataRow = factory.create(attributeArray.length); // double[] data = new double[attributeArray.length]; for (int i = 1; i <= metaData.getColumnCount(); i++) { Attribute attribute = attributeArray[i - 1]; int valueType = attribute.getValueType(); double value; if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, Ontology.DATE_TIME)) { Timestamp timestamp = resultSet.getTimestamp(i); if (resultSet.wasNull()) { value = Double.NaN; } else { value = timestamp.getTime(); } } else if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, Ontology.NUMERICAL)) { value = resultSet.getDouble(i); if (resultSet.wasNull()) { value = Double.NaN; } } else { if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(valueType, Ontology.NOMINAL)) { String valueString; if (metaData.getColumnType(i) == Types.CLOB) { Clob clob = resultSet.getClob(i); if (clob != null) { BufferedReader in = null; try { in = new BufferedReader(clob.getCharacterStream()); String line = null; try { StringBuffer buffer = new StringBuffer(); while ((line = in.readLine()) != null) { buffer.append(line + "\n"); } valueString = buffer.toString(); } catch (IOException e) { throw new OperatorException("Database error occurred: " + e, e); } } finally { try { in.close(); } catch (IOException e) {} } } else { valueString = null; } } else { valueString = resultSet.getString(i); } if (resultSet.wasNull() || valueString == null) { value = Double.NaN; } else { value = attribute.getMapping().mapString(valueString); } } else { if (logger != null) { logger.warning("Unknown column type: " + attribute); } value = Double.NaN; } } dataRow.set(attribute, value); // data[i-1] = value; } table.addDataRow(dataRow); // new DoubleArrayDataRow(data)); } return table; } public static List<Attribute> getAttributes(ResultSet resultSet) throws SQLException { ResultSetMetaData metaData = resultSet.getMetaData(); return getAttributes(metaData); } private static List<Attribute> getAttributes(ResultSetMetaData metaData) throws SQLException { List<Attribute> result = new LinkedList<Attribute>(); if (metaData != null) { // A map mapping original column names to a counter specifying how often // they were chosen Map<String, Integer> duplicateNameMap = new HashMap<String, Integer>(); for (int columnIndex = 1; columnIndex <= metaData.getColumnCount(); columnIndex++) { // column name from DB String dbColumnName = metaData.getColumnLabel(columnIndex); // name that will be used in example set String columnName = dbColumnName; // check original name first Integer duplicateCount = duplicateNameMap.get(dbColumnName); boolean isUnique = duplicateCount == null; if (isUnique) { // name is unique duplicateNameMap.put(columnName, new Integer(1)); } else { // name already present, iterate until unique while (!isUnique) { // increment duplicate counter duplicateCount = new Integer(duplicateCount.intValue() + 1); // create new name proposal columnName = dbColumnName + "_" + (duplicateCount - 1); // -1 because of compatibility // check if new name is already taken isUnique = duplicateNameMap.get(columnName) == null; } // save new duplicate count for old db column name duplicateNameMap.put(dbColumnName, duplicateCount); } int attributeType = DatabaseHandler.getRapidMinerTypeIndex(metaData.getColumnType(columnIndex)); final Attribute attribute = AttributeFactory.createAttribute(columnName, attributeType); attribute.getAnnotations().setAnnotation("sql_type", metaData.getColumnTypeName(columnIndex)); result.add(attribute); } } return result; } private String getQuery(StatementCreator sc) throws OperatorException { switch (getParameterAsInt(DatabaseHandler.PARAMETER_DEFINE_QUERY)) { case DatabaseHandler.QUERY_QUERY: { String query = getParameterAsString(DatabaseHandler.PARAMETER_QUERY); if (query != null) { query = query.trim(); } return query; } case DatabaseHandler.QUERY_FILE: { File queryFile = getParameterAsFile(DatabaseHandler.PARAMETER_QUERY_FILE); if (queryFile != null) { String query = null; try { query = Tools.readTextFile(queryFile); } catch (IOException ioe) { throw new UserError(this, ioe, 302, new Object[] { queryFile, ioe.getMessage() }); } if (query == null || query.trim().length() == 0) { throw new UserError(this, 205, queryFile); } return query; } } case DatabaseHandler.QUERY_TABLE: TableName tableName = DatabaseHandler.getSelectedTableName(this); //final String tableName = getParameterAsString(DatabaseHandler.PARAMETER_TABLE_NAME); return "SELECT * FROM " + sc.makeIdentifier(tableName); } return null; } @Override public ConnectionEntry getConnectionEntry() { return DatabaseHandler.getConnectionEntry(this); } @Override protected void addAnnotations(ExampleSet result) { try { if (databaseHandler != null) { result.getAnnotations().setAnnotation(Annotations.KEY_SOURCE, getQuery(databaseHandler.getStatementCreator())); } } catch (OperatorException e) {} } @Override protected boolean isMetaDataCacheable() { return true; } @Override public List<ParameterType> getParameterTypes() { List<ParameterType> list = super.getParameterTypes(); list.addAll(DatabaseHandler.getConnectionParameterTypes(this)); list.addAll(DatabaseHandler.getQueryParameterTypes(this, false)); list.addAll(DatabaseHandler.getStatementPreparationParamterTypes(this)); list.add(new ParameterTypeCategory(ExampleSource.PARAMETER_DATAMANAGEMENT, "Determines, how the data is represented internally.", DataRowFactory.TYPE_NAMES, DataRowFactory.TYPE_DOUBLE_ARRAY, false)); return list; } }