/** * Copyright 2010 Nube Technologies * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR * CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and limitations under the License. */ package co.nubetech.hiho.job; import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.sql.Connection; import java.sql.DatabaseMetaData; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.util.ArrayList; import java.util.Map.Entry; import java.util.NoSuchElementException; import java.util.StringTokenizer; import org.apache.avro.Schema; import org.apache.avro.mapred.AvroJob; import org.apache.avro.mapred.AvroValue; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobClient; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.log4j.Logger; import co.nubetech.hiho.mapreduce.lib.db.apache.DBConfiguration; import co.nubetech.hiho.mapreduce.lib.db.apache.DataDrivenDBInputFormat; import co.nubetech.hiho.mapreduce.lib.db.apache.MRJobConfig; import co.nubetech.hiho.avro.DBMapper; import co.nubetech.hiho.common.HIHOConf; import co.nubetech.hiho.common.HIHOException; import co.nubetech.hiho.common.OutputStrategyEnum; import co.nubetech.hiho.hive.HiveUtility; //import co.nubetech.hiho.mapred.avro.GenericRecordAvroOutputFormat; import co.nubetech.hiho.mapreduce.DBInputAvroMapper; import co.nubetech.hiho.mapreduce.DBInputDelimMapper; import co.nubetech.hiho.mapreduce.lib.db.DBQueryInputFormat; import co.nubetech.hiho.mapreduce.lib.db.GenericDBWritable; import co.nubetech.hiho.mapreduce.lib.output.NoKeyOnlyValueOutputFormat; import co.nubetech.hiho.pig.PigUtility; public class DBQueryInputJob extends Configured implements Tool { private final static Logger logger = Logger .getLogger(co.nubetech.hiho.job.DBQueryInputJob.class); private ArrayList params; public void populateConfiguration(String[] args, Configuration conf) { for (int i = 0; i < args.length - 1; i++) { if ("-jdbcDriver".equals(args[i])) { conf.set(DBConfiguration.DRIVER_CLASS_PROPERTY, args[++i]); } else if ("-jdbcUrl".equals(args[i])) { conf.set(DBConfiguration.URL_PROPERTY, args[++i]); } else if ("-jdbcUsername".equals(args[i])) { conf.set(DBConfiguration.USERNAME_PROPERTY, args[++i]); } else if ("-jdbcPassword".equals(args[i])) { conf.set(DBConfiguration.PASSWORD_PROPERTY, args[++i]); } else if ("-inputQuery".equals(args[i])) { conf.set(DBConfiguration.INPUT_QUERY, args[++i]); } else if ("-inputBoundingQuery".equals(args[i])) { conf.set(DBConfiguration.INPUT_BOUNDING_QUERY, args[++i]); } else if ("-outputPath".equals(args[i])) { conf.set(HIHOConf.INPUT_OUTPUT_PATH, args[++i]); } else if ("-outputStrategy".equals(args[i])) { conf.set(HIHOConf.INPUT_OUTPUT_STRATEGY, args[++i]); } else if ("-delimiter".equals(args[i])) { conf.set(HIHOConf.INPUT_OUTPUT_DELIMITER, args[++i]); } else if ("-numberOfMappers".equals(args[i])) { conf.set(HIHOConf.NUMBER_MAPPERS, args[++i]); } else if ("-inputTableName".equals(args[i])) { conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, args[++i]); } else if ("-inputFieldNames".equals(args[i])) { conf.set(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, args[++i]); } else if ("-inputOrderBy".equals(args[i])) { conf.set(DBConfiguration.INPUT_ORDER_BY_PROPERTY, args[++i]); } else if ("-inputLoadTo".equals(args[i])) { conf.set(HIHOConf.INPUT_OUTPUT_LOADTO, args[++i]); } else if ("-inputLoadToPath".equals(args[i])) { conf.set(HIHOConf.INPUT_OUTPUT_LOADTO_PATH, args[++i]); } else if ("-hiveDriver".equals(args[i])) { conf.set(HIHOConf.HIVE_DRIVER, args[++i]); } else if ("-hiveUrl".equals(args[i])) { conf.set(HIHOConf.HIVE_URL, args[++i]); } else if ("-hiveUsername".equals(args[i])) { conf.set(HIHOConf.HIVE_USR_NAME, args[++i]); } else if ("-hivePassword".equals(args[i])) { conf.set(HIHOConf.HIVE_PASSWORD, args[++i]); } else if ("-hivePartitionBy".equals(args[i])) { conf.set(HIHOConf.HIVE_PARTITION_BY, args[++i]); } else if ("-hiveIfNotExists".equals(args[i])) { conf.set(HIHOConf.HIVE_TABLE_OVERWRITE, args[++i]); } else if ("-hiveTableName".equals(args[i])) { conf.set(HIHOConf.HIVE_TABLE_NAME, args[++i]); } else if ("-hiveSortedBy".equals(args[i])) { conf.set(HIHOConf.HIVE_SORTED_BY, args[++i]); } else if ("-hiveClusteredBy".equals(args[i])) { conf.set(HIHOConf.HIVE_CLUSTERED_BY, args[++i]); } } } public void checkMandatoryConfs(Configuration conf) throws HIHOException { if (conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY) == null) { throw new HIHOException( "JDBC driver configuration is not specified,please specify JDBC driver class"); } if (conf.get(DBConfiguration.URL_PROPERTY) == null) { throw new HIHOException( "JDBC url path configuration is empty,please specify JDBC url path"); } if (!conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY).contains("hsqldb")) { if (conf.get(DBConfiguration.USERNAME_PROPERTY) == null) { throw new HIHOException( "JDBC user name configuration is empty,please specify JDBC user name"); } if (conf.get(DBConfiguration.PASSWORD_PROPERTY) == null) { throw new HIHOException( "JDBC password configuration is empty,please specify JDBC password"); } } if (conf.get(HIHOConf.INPUT_OUTPUT_PATH) == null) { throw new HIHOException( "Output path is not specified,please specify output path"); } if (conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY) != null && conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY).equals("DELIMITED")) { if (conf.get(HIHOConf.INPUT_OUTPUT_DELIMITER) == null) { throw new HIHOException( "Delimiter is not specified,please specify delimiter"); } } if (conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY) == null && conf.get(DBConfiguration.INPUT_QUERY) == null) { throw new HIHOException( "Input table name and input query both configurations are empty, please specify anyone of them"); } if (conf.get(DBConfiguration.INPUT_QUERY) != null && conf.get(DBConfiguration.INPUT_BOUNDING_QUERY) == null) { throw new HIHOException( "Please specify input bounding query as it is mandatory to be defined with input query "); } if (conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY) != null && conf.get(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY) == null) { conf.set(DBConfiguration.INPUT_FIELD_NAMES_PROPERTY, "*"); } if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null && conf.get(HIHOConf.INPUT_OUTPUT_LOADTO_PATH) == null) { throw new HIHOException( "Load to path configuration is empty, please specify path to load script in loadTOPath configuration"); } if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null && conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("hive")) { if (conf.get(HIHOConf.HIVE_URL) == null) { throw new HIHOException( "The Hive url is not defined, please specify hive url"); } if (conf.get(HIHOConf.HIVE_DRIVER) == null) { throw new HIHOException( "The Hive driver is not defined, please specify hive driver"); } if (checkForMultiplePartition(conf.get(HIHOConf.HIVE_PARTITION_BY)) && conf.get(HIHOConf.HIVE_TABLE_NAME) == null) { throw new HIHOException("please specify hive table name"); } } } @Override public int run(String[] args) throws IOException { Configuration conf = getConf(); populateConfiguration(args, conf); boolean isMultiplePartition = false; if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) { if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("hive")) { conf.set("hadoop.job.history.user.location", "none"); if (conf.get(HIHOConf.HIVE_PARTITION_BY) != null) { try { isMultiplePartition = checkForMultiplePartition(conf .get(HIHOConf.HIVE_PARTITION_BY)); } catch (HIHOException e) { e.printStackTrace(); } } } if (isMultiplePartition && conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("hive")) { populateHiveConfigurationForMultiplePartition(conf); } else { runJobs(conf, 0); } } else { runJobs(conf, 0); } return 0; } private boolean checkForMultiplePartition(String partitionBy) throws HIHOException { // ArrayList<PartitionBy> partitionByValues= new // ArrayList<PartitionBy>(); boolean isMultiplePartition = false; StringTokenizer partitionToken = new StringTokenizer(partitionBy, ";"); StringTokenizer partitionData = new StringTokenizer( partitionToken.nextToken(), ":"); try { partitionData.nextToken(); partitionData.nextToken(); } catch (NoSuchElementException e) { throw new HIHOException( "Data not defined properly in partitionBy configuration"); } if (partitionData.hasMoreTokens()) { int index = partitionData.nextToken().indexOf(","); if (index > -1) { isMultiplePartition = true; if (partitionToken.hasMoreTokens()) { throw new HIHOException( "Data not defined properly in partitionBy configuration"); } } } return isMultiplePartition; } private void generatePigScript(Configuration conf, Job job) throws HIHOException { // /see if import to pig or hive if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("pig")) { try { String pigScript = PigUtility.getLoadScript( HIHOConf.INPUT_OUTPUT_PATH, getDBWritable(conf)); // //jobId File file = new File(new File( conf.get(HIHOConf.INPUT_OUTPUT_LOADTO_PATH)), "pigScript" + ".txt"); FileOutputStream fos = new FileOutputStream(file); BufferedWriter w = new BufferedWriter(new OutputStreamWriter( fos)); w.write(pigScript); w.close(); fos.close(); } catch (Exception h) { throw new HIHOException("Unable to generate Pig script", h); } } } private void generateHiveScript(Configuration conf, Job job, int jobCounter) throws HIHOException { // /see if import to pig or hive if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO).equals("hive")) { try { HiveUtility.createTable(conf, job, getDBWritable(conf), jobCounter); } catch (Exception h) { throw new HIHOException("Unable to generate Hive script", h); } } } public static void main(String[] args) throws Exception { // setUp(); DBQueryInputJob job = new DBQueryInputJob(); // ArrayList params = new ArrayList(); // params.add(false); // job.setParams(params); int res = ToolRunner.run(new Configuration(), job, args); System.exit(res); } public ArrayList getParams() { return params; } public void setParams(ArrayList params) { this.params = params; } /* * this will move to the junit once everything is properly done * * public static void setUp() { // set up the database String db = * "cumulus"; String root = "root"; String pwd = "newpwd"; * * String user = "tester"; String password = "password"; * * Connection conn; String url = "jdbc:mysql://localhost:3306/"; String * driverName = "com.mysql.jdbc.Driver"; * * try { Class.forName(driverName).newInstance(); conn = * DriverManager.getConnection(url, root, pwd); try { Statement st = * conn.createStatement(); * * String dbDrop = "drop database if exists " + db; * st.executeUpdate(dbDrop); logger.debug("Dropped database"); * * String dbCreate = "create database " + db; st.executeUpdate(dbCreate); * logger.debug("Created database"); * * // Register a new user named tester on the // database named cumulus with * a password // password enabling several different // privileges. * st.executeUpdate("GRANT SELECT,INSERT,UPDATE,DELETE," + "CREATE,DROP " + * "ON " + db + ".* TO '" + user + "'@'localhost' " + "IDENTIFIED BY '" + * password + "';"); logger.debug("Created user tester"); st.close(); * * // now connect to the relevent db and create the schema conn = * DriverManager.getConnection(url + db, root, pwd); st = * conn.createStatement(); * * String desTable = * "Create table if not exists designations(id integer, designation varchar(30));\n" * ; st = conn.createStatement(); st.executeUpdate(desTable); * * logger.debug(desTable); * * String desTableData = * "insert into designations(id, designation) values("; desTableData += * "0, 'Manager');\n"; st.executeUpdate(desTableData); desTableData = * "insert into designations(id, designation) values("; desTableData += * "1, 'Accountant');\n"; st.executeUpdate(desTableData); desTableData = * "insert into designations(id, designation) values("; desTableData += * "2, 'Assistant');\n"; st.executeUpdate(desTableData); desTableData = * "insert into designations(id, designation) values("; desTableData += * "3, 'Sr. Manager');\n"; logger.debug(desTableData); * st.executeUpdate(desTableData); * logger.debug("Data inserte4d into designations"); * * String table = * "CREATE TABLE if not exists extractJobEmployee(id integer, name varchar(50), age integer" * ; table += ", isMarried boolean, salary double, designationId integer);"; * st = conn.createStatement(); st.executeUpdate(table); * * logger.debug("Schema creation process successfull!"); * logger.debug("Inserting table data"); * * for (int i = 0; i < 50; ++i) { int designation = i % 4; String tableData * = * "Insert into extractJobEmployee(id, name, age, isMarried, salary, designationId) values(" * ; tableData += i + ", 'Employee" + i; tableData += "', 25, false, 349.9," * + designation + ");\n"; logger.debug(tableData); st = * conn.createStatement(); st.executeUpdate(tableData); } // conn.commit(); * logger.debug("Table data insertion process successfull!"); } catch * (SQLException s) { s.printStackTrace(); } conn.close(); } catch * (Exception e) { e.printStackTrace(); } * * } */ public static GenericDBWritable getDBWritable(Configuration conf) throws HIHOException { try { String driverName = conf.get(DBConfiguration.DRIVER_CLASS_PROPERTY); String url = conf.get(DBConfiguration.URL_PROPERTY); String user = conf.get(DBConfiguration.USERNAME_PROPERTY); String password = conf.get(DBConfiguration.PASSWORD_PROPERTY); Class.forName(driverName).newInstance(); Connection conn = DriverManager.getConnection(url, user, password); DatabaseMetaData dbMeta = conn.getMetaData(); String dbProductName = dbMeta.getDatabaseProductName() .toUpperCase(); String query = getSelectQuery(conf, dbProductName); PreparedStatement st = conn.prepareStatement(query); logger.warn("\n Query for GenericDBWritable is " + query); GenericDBWritable writable = new GenericDBWritable( GenericDBWritable.populateColumnInfo(st.getMetaData()), null); return writable; } catch (Exception e) { e.printStackTrace(); throw new HIHOException("Unable to get metadata for the query", e); } } /** * Returns the query for selecting the records, subclasses can override this * for custom behaviour. A lot is actually a copy from * DataDrivenDBRecordReader */ @SuppressWarnings("unchecked") public static String getSelectQuery(Configuration conf, String dbProductName) throws HIHOException { StringBuilder query = new StringBuilder(); DBConfiguration dbConf = new DBConfiguration(conf); String[] fieldNames = dbConf.getInputFieldNames(); String tableName = dbConf.getInputTableName(); String conditions = dbConf.getInputConditions(); StringBuilder conditionClauses = new StringBuilder(); if (dbConf.getInputQuery() == null) { // We need to generate the entire query. query.append("SELECT "); for (int i = 0; i < fieldNames.length; i++) { query.append(fieldNames[i]); if (i != fieldNames.length - 1) { query.append(", "); } } query.append(" FROM ").append(tableName); if (!dbProductName.startsWith("ORACLE")) { // Seems to be necessary for hsqldb? Oracle explicitly does // *not* // use this clause. query.append(" AS ").append(tableName); } } else { // User provided the query. We replace the special token with our // WHERE clause. String inputQuery = dbConf.getInputQuery(); if (inputQuery.indexOf(DataDrivenDBInputFormat.SUBSTITUTE_TOKEN) == -1) { logger.error("Could not find the clause substitution token " + DataDrivenDBInputFormat.SUBSTITUTE_TOKEN + " in the query: [" + inputQuery + "]. Parallel splits may not work correctly."); } // bad bad hack, but we dont have the split here.. conditionClauses.append("( 1=1 )"); query.append(inputQuery.replace( DataDrivenDBInputFormat.SUBSTITUTE_TOKEN, conditionClauses.toString())); } return query.toString(); } public void populateHiveConfigurationForMultiplePartition(Configuration conf) throws IOException { String columnName = null, columnType = null; ArrayList columnsValues = new ArrayList(); ArrayList query = new ArrayList(); ArrayList table = new ArrayList(); String partitionByData = conf.get(HIHOConf.HIVE_PARTITION_BY); boolean isInputQueryDelimited = false; String queries = null; if (conf.get(DBConfiguration.INPUT_QUERY) != null) { isInputQueryDelimited = true; queries = conf.get(DBConfiguration.INPUT_QUERY); } // / StringTokenizer partitionByTokens = new StringTokenizer( partitionByData, ":"); columnName = partitionByTokens.nextToken(); columnType = partitionByTokens.nextToken(); StringTokenizer partitionByValues = new StringTokenizer( partitionByTokens.nextToken(), ","); int counter = 0; int tokenCounts = partitionByValues.countTokens(); while (partitionByValues.hasMoreTokens()) { columnsValues.add(counter, partitionByValues.nextToken()); counter++; } // / if (isInputQueryDelimited) { StringTokenizer queryTokens = new StringTokenizer(queries, ";"); counter = 0; while (queryTokens.hasMoreTokens()) { query.add(counter, queryTokens.nextToken()); counter++; } } else { StringTokenizer tableTokens = new StringTokenizer( conf.get(DBConfiguration.INPUT_TABLE_NAME_PROPERTY), ";"); counter = 0; while (tableTokens.hasMoreTokens()) { table.add(counter, tableTokens.nextToken()); counter++; } } for (int jobCounter = 0; jobCounter < tokenCounts; jobCounter++) { if (isInputQueryDelimited) { ; conf.set(DBConfiguration.INPUT_QUERY, query.get(jobCounter) .toString()); } else { conf.set(DBConfiguration.INPUT_TABLE_NAME_PROPERTY, table.get(jobCounter).toString()); } conf.set(HIHOConf.INPUT_OUTPUT_PATH, conf.get(HIHOConf.INPUT_OUTPUT_PATH) + jobCounter); // conf.set(HIHOConf.HIVE_TABLE_OVERWRITE, "true"); String partitionBy = columnName + ":" + columnType + ":" + columnsValues.get(jobCounter).toString(); conf.set(HIHOConf.HIVE_PARTITION_BY, partitionBy); runJobs(conf, jobCounter); } } public void runJobs(Configuration conf, int jobCounter) throws IOException { try { checkMandatoryConfs(conf); } catch (HIHOException e1) { e1.printStackTrace(); throw new IOException(e1); } Job job = new Job(conf); for (Entry<String, String> entry : conf) { logger.warn("key, value " + entry.getKey() + "=" + entry.getValue()); } // logger.debug("Number of maps " + // conf.getInt("mapred.map.tasks", 1)); // conf.setInt(JobContext.NUM_MAPS, // conf.getInt("mapreduce.job.maps", 1)); // job.getConfiguration().setInt("mapred.map.tasks", 4); job.getConfiguration().setInt(MRJobConfig.NUM_MAPS, conf.getInt(HIHOConf.NUMBER_MAPPERS, 1)); logger.warn("Number of maps " + conf.getInt(MRJobConfig.NUM_MAPS, 1)); job.setJobName("Import job"); job.setJarByClass(DBQueryInputJob.class); String strategy = conf.get(HIHOConf.INPUT_OUTPUT_STRATEGY); OutputStrategyEnum os = OutputStrategyEnum.value(strategy); if (os == null) { throw new IllegalArgumentException( "Wrong value of output strategy. Please correct"); } if (os != OutputStrategyEnum.AVRO) { switch (os) { case DUMP: { // job.setMapperClass(DBImportMapper.class); break; } /* * case AVRO: { job.setMapperClass(DBInputAvroMapper.class); // * need avro in cp // job.setJarByClass(Schema.class); // need * jackson which is needed by avro - ugly! // * job.setJarByClass(ObjectMapper.class); * job.setMapOutputKeyClass(NullWritable.class); * job.setMapOutputValueClass(AvroValue.class); * job.setOutputKeyClass(NullWritable.class); * job.setOutputValueClass(AvroValue.class); * job.setOutputFormatClass(AvroOutputFormat.class); * * AvroOutputFormat.setOutputPath(job, new * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; } */ case DELIMITED: { job.setMapperClass(DBInputDelimMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class); NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path( getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); } case JSON: { // job.setMapperClass(DBImportJsonMapper.class); // job.setJarByClass(ObjectMapper.class); break; } default: { job.setMapperClass(DBInputDelimMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(NoKeyOnlyValueOutputFormat.class); NoKeyOnlyValueOutputFormat.setOutputPath(job, new Path( getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); break; } } String inputQuery = conf.get(DBConfiguration.INPUT_QUERY); String inputBoundingQuery = conf .get(DBConfiguration.INPUT_BOUNDING_QUERY); logger.debug("About to set the params"); DBQueryInputFormat.setInput(job, inputQuery, inputBoundingQuery, params); logger.debug("Set the params"); job.setNumReduceTasks(0); try { // job.setJarByClass(Class.forName(conf.get( // org.apache.hadoop.mapred.lib.db.DBConfiguration.DRIVER_CLASS_PROPERTY))); logger.debug("OUTPUT format class is " + job.getOutputFormatClass()); /* * org.apache.hadoop.mapreduce.OutputFormat<?, ?> output = * ReflectionUtils.newInstance(job.getOutputFormatClass(), * job.getConfiguration()); output.checkOutputSpecs(job); */ logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass() .getName()); job.waitForCompletion(false); if (conf.get(HIHOConf.INPUT_OUTPUT_LOADTO) != null) { generateHiveScript(conf, job, jobCounter); generatePigScript(conf, job); } } /* * catch (HIHOException h) { h.printStackTrace(); } */ catch (Exception e) { e.printStackTrace(); } catch (HIHOException e) { e.printStackTrace(); } } // avro to be handled differently, thanks to all the incompatibilities // in the apis. /*else { String inputQuery = conf.get(DBConfiguration.INPUT_QUERY); String inputBoundingQuery = conf .get(DBConfiguration.INPUT_BOUNDING_QUERY); logger.debug("About to set the params"); // co.nubetech.apache.hadoop.mapred.DBQueryInputFormat.setInput(job, // inputQuery, inputBoundingQuery, params); logger.debug("Set the params"); JobConf jobConf = new JobConf(conf); try { GenericDBWritable queryWritable = getDBWritable(jobConf); Schema pair = DBMapper .getPairSchema(queryWritable.getColumns()); AvroJob.setMapOutputSchema(jobConf, pair); GenericRecordAvroOutputFormat.setOutputPath(jobConf, new Path( getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); DBQueryInputFormat.setInput( jobConf, inputQuery, inputBoundingQuery, params); jobConf.setInputFormat(DBQueryInputFormat.class); jobConf.setMapperClass(DBInputAvroMapper.class); jobConf.setMapOutputKeyClass(NullWritable.class); jobConf.setMapOutputValueClass(AvroValue.class); jobConf.setOutputKeyClass(NullWritable.class); jobConf.setOutputValueClass(Text.class); jobConf.setOutputFormat(GenericRecordAvroOutputFormat.class); jobConf.setJarByClass(DBQueryInputJob.class); jobConf.setStrings( "io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization,org.apache.avro.mapred.AvroSerialization"); jobConf.setNumReduceTasks(0); /* * jobConf.setOutputFormat(org.apache.hadoop.mapred. * SequenceFileOutputFormat.class); * org.apache.hadoop.mapred.SequenceFileOutputFormat * .setOutputPath(jobConf, new * Path(getConf().get(HIHOConf.INPUT_OUTPUT_PATH))); JobClient.runJob(jobConf); } catch (Throwable e) { e.printStackTrace(); } }*/ } }