import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter;
import org.apache.hadoop.hbase.filter.RegexStringComparator;
import org.apache.hadoop.hbase.filter.RowFilter;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.hbase.mapreduce.TableMapper;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.godhuli.rhipe.RHRaw;
/**
* Average of data points in OpenTSDB
*
*
*/
public class MR {
static class Mapper1 extends TableMapper<IntWritable, FloatWritable> {
//private int numRecords = 0;
private static final IntWritable KEY = new IntWritable(1);
private static byte [] TS = new byte[4];
private final FloatWritable VALUE = new FloatWritable(1);
public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
System.arraycopy(row.get(), 3, TS, 0, 4); // get timestamp from row key
for (KeyValue kv : values.raw()) {
//final short delta = (short) (( org.apache.hadoop.hbase.util.Bytes.toShort(kv.getQualifier()) & 0xFFFF) >>> 4);
//int timestamp = org.apache.hadoop.hbase.util.Bytes.toInt(TS)+ delta;
//System.out.print("\nBase Timestamp as Rowkey => "+ org.apache.hadoop.hbase.util.Bytes.toInt(TS) + " -- Timestamp " + timestamp + "\n");
//System.out.print("Value " + org.apache.hadoop.hbase.util.Bytes.toLong(kv.getValue() )+ "\n");
VALUE.set(org.apache.hadoop.hbase.util.Bytes.toLong(kv.getValue()));
//KEY.set(timestamp);
try {
context.write(KEY,VALUE); //Define key as 1 same for every values
}
catch (InterruptedException e) {
throw new IOException(e);
}
//numRecords++;
}
}
}
public static class MyReducer extends TableReducer<IntWritable, FloatWritable, ImmutableBytesWritable> {
public static final byte[] CF = "cf".getBytes();
public static final byte[] COUNT = "count".getBytes();
@Override
public void reduce(IntWritable key, Iterable<FloatWritable> values, Context context)
throws IOException, InterruptedException {
//int sum = 0;
int count =0;
for (FloatWritable value : values) {
//sum += value.get();
count++;
}
//float avg = sum/count;
Put put = new Put(org.apache.hadoop.hbase.util.Bytes.toBytes(key.get()));
// put.add org.apache.hadoop.hbase.util.Bytes.toBytes(sum));
//System.out.print("Mean value => " + avg+ "\n");
System.out.print("Total data point => " + count+ "\n");
put.add(org.apache.hadoop.hbase.util.Bytes.toBytes("number"), org.apache.hadoop.hbase.util.Bytes.toBytes(""), org.apache.hadoop.hbase.util.Bytes.toBytes(count));
context.write(null, put);
}
}
@SuppressWarnings("deprecation")
public static void main(String[] args) throws Exception {
// Configuration config = new Configuration();
// config.set("fs.default.name", "hdfs://home/bikash/tmp");
// config.set("mapred.job.tracker", "localhost:50030/");
// "1973/01/01-00:00:00" "2014/07/22-10:00:00" "r2time.load.test1" "haisen24.ux.uis.no"
String sdate = "1973/01/01-00:00:00";
String edate = "1975/01/01-01:00:00";
String metric = "r2time.load.test1";
String zookeeper = "haisen24.ux.uis.no";
if (args.length > 0) {
sdate = args[0];
edate = args[1];
metric = args[2];
zookeeper = args[3];
}
else
{
System.err.println("Please enter the start data, end data, metric and zookeeper.");
System.exit(1);
}
Configuration conf = HBaseConfiguration.create();
String zookeeperQuorum = zookeeper;
String HBaseMaster = "haisen23.ux.uis.no:60000";
// String zookeeperQuorum = "localhost";
// String HBaseMaster = "localhost:60000";
conf.set("hbase.zookeeper.quorum", zookeeperQuorum);
conf.set("hbase.zookeeper.property.clientPort", "2181");
// conf.set("mapred.job.tracker", "haisen22.ux.uis.no:8021");
// conf.set("fs.default.name", "hdfs://haisen20.ux.uis.no:8020");
conf.set("hbase.master", HBaseMaster);
Job job = new Job(conf, "MapReduce by Hbase");
job.setJarByClass(MR.class);
/***create scan object ***/
DataType dt = new DataType();
dt.setHbaseClient(zookeeperQuorum);
String[] tagk = {"1","host"};
String[] tagv = {"1","*"};
String[] val = DataType.getRowkeyFilter(sdate,edate,metric,tagk, tagv);
Scan scans = new Scan();
scans.setStartRow(org.apache.commons.codec.binary.Base64.decodeBase64(val[0]));
scans.setStopRow(org.apache.commons.codec.binary.Base64.decodeBase64(val[1]));
RowFilter rowFilterRegex = new RowFilter(CompareFilter.CompareOp.EQUAL,
new RegexStringComparator( org.apache.hadoop.hbase.util.Bytes.toString(org.apache.commons.codec.binary.Base64.decodeBase64(val[2]))));
scans.setFilter(rowFilterRegex);
scans.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs
scans.setCacheBlocks(false);
job.setOutputKeyClass(RHRaw.class);
job.setOutputValueClass(RHResult.class);
job.setInputFormatClass(RHHBaseRecorder.class);
String columns = "t:"; // comma seperated
scans.addColumns(columns);
//job.setMapperClass(Mapper1.class);
TableMapReduceUtil.initTableMapperJob("tsdb", scans, Mapper1.class, IntWritable.class,FloatWritable.class, job);
job.setReducerClass(MyReducer.class);
//job.setNumReduceTasks(1); // at least one, adjust as required
//FileOutputFormat.setWorkOutputPath(job, new Path("/home/bikash/tmp/out")); // adjust directories as required
//FileOutputFormat.setOutputPath(job, new Path("/home/bikash/tmp/1111223"));
TableMapReduceUtil.initTableReducerJob("out", MyReducer.class, job);
//FileOutputFormat.setOutputPath(job, new Path("/tmp/mr/mySummaryFile")); // adjust directories as required
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}