/* Copyright (c) 2005 - 2012 Vertica, an HP company -*- Java -*- */ package com.vertica.hadoop; import java.io.IOException; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.math.BigDecimal; import java.sql.Date; import java.sql.Timestamp; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configured; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import com.vertica.hadoop.VerticaConfiguration; import com.vertica.hadoop.VerticaInputFormat; import com.vertica.hadoop.VerticaOutputFormat; import com.vertica.hadoop.VerticaRecord; public class VerticaExample extends Configured implements Tool { public static class Map extends Mapper<LongWritable, VerticaRecord, Text, DoubleWritable> { public void map(LongWritable key, VerticaRecord value, Context context) throws IOException, InterruptedException { if (value.get(4) != null && value.get(1) != null) { context.write(new Text((String) value.get(4)), new DoubleWritable( (Long) value.get(1))); } } } public static class Reduce extends Reducer<Text, DoubleWritable, Text, VerticaRecord> { VerticaRecord record = null; public void setup(Context context) throws IOException, InterruptedException { super.setup(context); try { record = new VerticaRecord(context.getConfiguration()); } catch (Exception e) { throw new IOException(e); } } public void reduce(Text key, Iterable<DoubleWritable> values, Context context) throws IOException, InterruptedException { if (record == null) { throw new IOException("No output record found"); } record.set("a", 125); record.set("b", true); record.set("c", 'c'); record.set("d", new java.sql.Date(Calendar.getInstance().getTimeInMillis())); record.set("f", 234.526); record.set("t", new java.sql.Timestamp(Calendar.getInstance().getTimeInMillis())); record.set("v", "foobar string"); record.set("z", new byte[10]); context.write(new Text("mrtarget"), record); } } public Job getJob() throws IOException { Configuration conf = getConf(); Job job = new Job(conf); conf = job.getConfiguration(); conf.set("mapreduce.job.tracker", "local"); job.setJobName("vertica test"); job.setOutputKeyClass(Text.class); job.setOutputValueClass(VerticaRecord.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(DoubleWritable.class); job.setInputFormatClass(VerticaInputFormat.class); job.setOutputFormatClass(VerticaOutputFormat.class); job.setJarByClass(VerticaExample.class); job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); VerticaOutputFormat.setOutput(job, "mrtarget", true, "a int", "b boolean", "c char(1)", "d date", "f float", "t timestamp", "v varchar", "z varbinary"); return job; } @SuppressWarnings("serial") @Override public int run(String[] args) throws Exception { Job job = getJob(); VerticaOutputFormat.setOutput(job, "mrtarget", true, "a int", "b boolean", "c char(1)", "d date", "f float", "t timestamp", "v varchar", "z varbinary"); VerticaInputFormat.setInput(job, "select * from allTypes where key = ?", "select distinct key from allTypes"); job.waitForCompletion(true); job = getJob(); Collection<List<Object>> params = new HashSet<List<Object>>() { }; List<Object> param = new ArrayList<Object>(); param.add(new Integer(0)); params.add(param); VerticaInputFormat.setInput(job, "select * from allTypes where key = ?", params); job.waitForCompletion(true); job = getJob(); VerticaInputFormat.setInput(job, "select * from allTypes where key = ?", "0", "1", "2"); job.waitForCompletion(true); return 0; } public static void main(String[] args) throws Exception { int res = ToolRunner.run(new Configuration(), new VerticaExample(), args); System.exit(res); } }