MR.java example



import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.godhuli.rhipe.RHRaw;
/**
 * Average of data points in OpenTSDB
 * 
 * 
*/
public class MR {

    static class Mapper1 extends TableMapper<IntWritable, FloatWritable> {

        //private int numRecords = 0;
        private static final IntWritable KEY = new IntWritable(1);
        private static byte [] TS = new byte[4];
        private final FloatWritable VALUE = new FloatWritable(1);
        public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
        	
        	 System.arraycopy(row.get(), 3, TS, 0, 4);  // get timestamp from row key
             for (KeyValue kv : values.raw()) {
             	//final short delta = (short) (( org.apache.hadoop.hbase.util.Bytes.toShort(kv.getQualifier()) & 0xFFFF) >>> 4);
             	//int timestamp = org.apache.hadoop.hbase.util.Bytes.toInt(TS)+ delta;	
           	    //System.out.print("\nBase Timestamp as Rowkey => "+ org.apache.hadoop.hbase.util.Bytes.toInt(TS) + " -- Timestamp " + timestamp +  "\n");   
           	    //System.out.print("Value " + org.apache.hadoop.hbase.util.Bytes.toLong(kv.getValue() )+ "\n");   
           	    VALUE.set(org.apache.hadoop.hbase.util.Bytes.toLong(kv.getValue()));
           	    //KEY.set(timestamp);
           	    try {
           	    	context.write(KEY,VALUE); //Define key as 1 same for every values
           	    }
           	    catch (InterruptedException e) {
           	    	throw new IOException(e);
           	    }
           	    //numRecords++;
             }
        }
    }

    
    public static class MyReducer extends  TableReducer<IntWritable, FloatWritable, ImmutableBytesWritable> {
    	public static final byte[] CF = "cf".getBytes();
    	public static final byte[] COUNT = "count".getBytes();
    	@Override
		 public void reduce(IntWritable key, Iterable<FloatWritable> values, Context context) 
				 throws IOException, InterruptedException {
		    //int sum = 0;
		    int count =0;
		    for (FloatWritable value : values) {
		       //sum += value.get();
		       count++;
		    }
		    //float avg = sum/count;
		    Put put = new Put(org.apache.hadoop.hbase.util.Bytes.toBytes(key.get()));
	
		   // put.add org.apache.hadoop.hbase.util.Bytes.toBytes(sum));
		    //System.out.print("Mean value => " + avg+ "\n");  
		    System.out.print("Total data point => " + count+ "\n");
		    put.add(org.apache.hadoop.hbase.util.Bytes.toBytes("number"), org.apache.hadoop.hbase.util.Bytes.toBytes(""), org.apache.hadoop.hbase.util.Bytes.toBytes(count));
		    context.write(null, put);
		 }
    }
    
   @SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
//	    Configuration config = new Configuration();
//	    config.set("fs.default.name", "hdfs://home/bikash/tmp");
//	    config.set("mapred.job.tracker", "localhost:50030/");
// "1973/01/01-00:00:00" "2014/07/22-10:00:00" "r2time.load.test1" "haisen24.ux.uis.no"
	   String sdate 	= "1973/01/01-00:00:00";
	   String edate 	= "1975/01/01-01:00:00";
	   String metric	= "r2time.load.test1";
	   String zookeeper = "haisen24.ux.uis.no";
	   if (args.length > 0) {
		   sdate 		= args[0];
		   edate 		= args[1];
		   metric 		= args[2];
		   zookeeper 	= args[3];
	   }
	   else
	   {
		   System.err.println("Please enter the start data, end data, metric and zookeeper.");
	       System.exit(1); 
	   }
	    
	    Configuration conf = HBaseConfiguration.create();
	    String zookeeperQuorum = zookeeper;
	    String HBaseMaster = "haisen23.ux.uis.no:60000";
//	    String zookeeperQuorum = "localhost";
//	    String HBaseMaster = "localhost:60000";
	    conf.set("hbase.zookeeper.quorum", zookeeperQuorum);
	    conf.set("hbase.zookeeper.property.clientPort", "2181");
//	    conf.set("mapred.job.tracker", "haisen22.ux.uis.no:8021");
//	    conf.set("fs.default.name", "hdfs://haisen20.ux.uis.no:8020");

	    conf.set("hbase.master", HBaseMaster);
        Job job = new Job(conf, "MapReduce by Hbase");
        job.setJarByClass(MR.class);
        
        /***create scan object ***/
        DataType dt = new DataType();
    	dt.setHbaseClient(zookeeperQuorum);
    	String[] tagk = {"1","host"};
        String[] tagv = {"1","*"};
        
    	String[] val =  DataType.getRowkeyFilter(sdate,edate,metric,tagk, tagv);
    	Scan scans = new Scan();
    	scans.setStartRow(org.apache.commons.codec.binary.Base64.decodeBase64(val[0]));
        scans.setStopRow(org.apache.commons.codec.binary.Base64.decodeBase64(val[1]));    
        RowFilter rowFilterRegex = new RowFilter(CompareFilter.CompareOp.EQUAL,
               new RegexStringComparator( org.apache.hadoop.hbase.util.Bytes.toString(org.apache.commons.codec.binary.Base64.decodeBase64(val[2]))));
        scans.setFilter(rowFilterRegex); 
        scans.setCaching(500);        // 1 is the default in Scan, which will be bad for MapReduce jobs
        scans.setCacheBlocks(false);
        
        
        job.setOutputKeyClass(RHRaw.class);
        job.setOutputValueClass(RHResult.class);
        job.setInputFormatClass(RHHBaseRecorder.class);
        String columns = "t:"; // comma seperated
        scans.addColumns(columns);

        
        //job.setMapperClass(Mapper1.class);
        TableMapReduceUtil.initTableMapperJob("tsdb", scans, Mapper1.class, IntWritable.class,FloatWritable.class, job);
        job.setReducerClass(MyReducer.class);
        //job.setNumReduceTasks(1);    // at least one, adjust as required
        
        //FileOutputFormat.setWorkOutputPath(job, new Path("/home/bikash/tmp/out"));  // adjust directories as required
        //FileOutputFormat.setOutputPath(job, new Path("/home/bikash/tmp/1111223"));
        TableMapReduceUtil.initTableReducerJob("out", MyReducer.class, job);

        //FileOutputFormat.setOutputPath(job, new Path("/tmp/mr/mySummaryFile"));  // adjust directories as required

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}