/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapreduce.lib.db; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.ArrayList; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapreduce.InputFormat; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.RecordReader; import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.conf.Configuration; /** * A RecordReader that reads records from a SQL table, * using data-driven WHERE clause splits. * Emits LongWritables containing the record number as * key and DBWritables as value. */ @InterfaceAudience.Public @InterfaceStability.Evolving public class DataDrivenDBRecordReader<T extends DBWritable> extends DBRecordReader<T> { private static final Log LOG = LogFactory.getLog(DataDrivenDBRecordReader.class); private String dbProductName; // database manufacturer string. /** * @param split The InputSplit to read data for * @throws SQLException */ public DataDrivenDBRecordReader(DBInputFormat.DBInputSplit split, Class<T> inputClass, Configuration conf, Connection conn, DBConfiguration dbConfig, String cond, String [] fields, String table, String dbProduct) throws SQLException { super(split, inputClass, conf, conn, dbConfig, cond, fields, table); this.dbProductName = dbProduct; } /** Returns the query for selecting the records, * subclasses can override this for custom behaviour.*/ @SuppressWarnings("unchecked") protected String getSelectQuery() { StringBuilder query = new StringBuilder(); DataDrivenDBInputFormat.DataDrivenDBInputSplit dataSplit = (DataDrivenDBInputFormat.DataDrivenDBInputSplit) getSplit(); DBConfiguration dbConf = getDBConf(); String [] fieldNames = getFieldNames(); String tableName = getTableName(); String conditions = getConditions(); // Build the WHERE clauses associated with the data split first. // We need them in both branches of this function. StringBuilder conditionClauses = new StringBuilder(); conditionClauses.append("( ").append(dataSplit.getLowerClause()); conditionClauses.append(" ) AND ( ").append(dataSplit.getUpperClause()); conditionClauses.append(" )"); if(dbConf.getInputQuery() == null) { // We need to generate the entire query. query.append("SELECT "); for (int i = 0; i < fieldNames.length; i++) { query.append(fieldNames[i]); if (i != fieldNames.length -1) { query.append(", "); } } query.append(" FROM ").append(tableName); if (!dbProductName.startsWith("ORACLE")) { // Seems to be necessary for hsqldb? Oracle explicitly does *not* // use this clause. query.append(" AS ").append(tableName); } query.append(" WHERE "); if (conditions != null && conditions.length() > 0) { // Put the user's conditions first. query.append("( ").append(conditions).append(" ) AND "); } // Now append the conditions associated with our split. query.append(conditionClauses.toString()); } else { // User provided the query. We replace the special token with our WHERE clause. String inputQuery = dbConf.getInputQuery(); if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) { LOG.error("Could not find the clause substitution token " + DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: [" + inputQuery + "]. Parallel splits may not work correctly."); } query.append(inputQuery.replace(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN, conditionClauses.toString())); } LOG.debug("Using query: " + query.toString()); return query.toString(); } }