/************************** * Author:Bikash Agrawal * Email: er.bikash21@gmail.com * Created: 10 May 2013 * Website: www.bikashagrawal.com.np * * Description: This class is used to get timeseries data from tsdb table. */ import java.io.IOException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.filter.CompareFilter; import org.apache.hadoop.hbase.filter.RegexStringComparator; import org.apache.hadoop.hbase.filter.RowFilter; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Pair; public class Fun { private static Log LOG = LogFactory.getLog(Fun.class); public static long getEndTimeAtResolution(long time, int resolution) { Calendar cal = Calendar.getInstance(); cal.setTimeInMillis(time); switch (resolution) { case Calendar.DATE: cal.set(Calendar.HOUR, 23); case Calendar.HOUR: cal.set(Calendar.MINUTE, 59); case Calendar.MINUTE: cal.set(Calendar.SECOND, 59); case Calendar.SECOND: cal.set(Calendar.MILLISECOND, 999); default: break; } return cal.getTimeInMillis(); } public static Scan[] generateHexPrefixScans(Calendar startCal, Calendar endCal, String dateFormat, ArrayList<Pair<String,String>> columns, int caching, boolean cacheBlocks) { ArrayList<Scan> scans = new ArrayList<Scan>(); String[] salts = new String[16]; for (int i=0; i < 16; i++) { salts[i] = Integer.toHexString(i); } SimpleDateFormat rowsdf = new SimpleDateFormat(dateFormat); long endTime = getEndTimeAtResolution(endCal.getTimeInMillis(), Calendar.DATE); while (startCal.getTimeInMillis() < endTime) { int d = Integer.parseInt(rowsdf.format(startCal.getTime())); for (int i=0; i < salts.length; i++) { Scan s = new Scan(); s.setCaching(caching); s.setCacheBlocks(cacheBlocks); // add columns for (Pair<String,String> pair : columns) { s.addColumn(pair.getFirst().getBytes(), pair.getSecond().getBytes()); } //01012310eded859-a6b8-463c-8c8e-721592101231 s.setStartRow(Bytes.toBytes(salts[i] + String.format("%06d", d))); s.setStopRow(Bytes.toBytes(salts[i] + String.format("%06d", d + 1))); if (LOG.isDebugEnabled()) { LOG.info("Adding start-stop range: " + salts[i] + String.format("%06d", d) + " - " + salts[i] + String.format("%06d", d + 1)); } scans.add(s); } startCal.add(Calendar.DATE, 1); } return scans.toArray(new Scan[scans.size()]); } public static Scan[] generateBytePrefixScans(Calendar startCal, Calendar endCal, String dateFormat, ArrayList<Pair<String,String>> columns, int caching, boolean cacheBlocks) { ArrayList<Scan> scans = new ArrayList<Scan>(); SimpleDateFormat rowsdf = new SimpleDateFormat(dateFormat); long endTime = getEndTimeAtResolution(endCal.getTimeInMillis(), Calendar.DATE); byte[] temp = new byte[1]; while (startCal.getTimeInMillis() < endTime) { for (byte b=Byte.MIN_VALUE; b < Byte.MAX_VALUE; b++) { int d = Integer.parseInt(rowsdf.format(startCal.getTime())); Scan s = new Scan(); s.setCaching(caching); s.setCacheBlocks(cacheBlocks); // add columns for (Pair<String,String> pair : columns) { s.addColumn(pair.getFirst().getBytes(), pair.getSecond().getBytes()); } temp[0] = b; s.setStartRow(Bytes.add(temp , Bytes.toBytes(String.format("%06d", d)))); s.setStopRow(Bytes.add(temp , Bytes.toBytes(String.format("%06d", d + 1)))); if (LOG.isDebugEnabled()) { LOG.info("Adding start-stop range: " + temp + String.format("%06d", d) + " - " + temp + String.format("%06d", d + 1)); } scans.add(s); } startCal.add(Calendar.DATE, 1); } return scans.toArray(new Scan[scans.size()]); } public static Scan[] generateScans(String st, String en, ArrayList<Pair<String,String>> columns,int caching, boolean cacheBlocks) { ArrayList<Scan> scans = new ArrayList<Scan>(); Scan s = new Scan(); s.setCaching(caching); s.setCacheBlocks(cacheBlocks); if(columns !=null){ for (Pair<String,String> pair : columns) { String second = pair.getSecond(); if(second == null) s.addFamily(pair.getFirst().getBytes()); else s.addColumn(pair.getFirst().getBytes(), pair.getSecond().getBytes()); } } if(st != null) { byte[] stb1 = org.apache.commons.codec.binary.Base64.decodeBase64(st); s.setStartRow(stb1); } if(en != null) { byte[] enb2 = org.apache.commons.codec.binary.Base64.decodeBase64(en); s.setStopRow(enb2); } scans.add(s); return scans.toArray(new Scan[scans.size()]); } public static Scan[] generateScansRows(String st, String en, int caching, boolean cacheBlocks, String filter, int batch) { //LOG.info(" End row------ " +Bytes.toStringBinary( org.apache.commons.codec.binary.Base64.decodeBase64(en))); LOG.info(" cache----- " + caching); ArrayList<Scan> scans = new ArrayList<Scan>(); Scan s = new Scan(); //s.setCacheBlocks(false); s.setBatch(batch); s.setCaching(caching); // 1 is the default in Scan, which will be bad for // MapReduce jobs s.setCacheBlocks(false); // don't set to true for MR jobs if(st != null) { byte[] stb1 = org.apache.commons.codec.binary.Base64.decodeBase64(st); LOG.info(" Start row------ " +Bytes.toStringBinary(stb1)); s.setStartRow(stb1); } if(en != null) { byte[] enb2 = org.apache.commons.codec.binary.Base64.decodeBase64(en); LOG.info(" End row------ " +Bytes.toStringBinary(enb2)); s.setStopRow(enb2); } //LOG.info(" Filter- ----- " + filter); RowFilter rowFilterRegex = new RowFilter(CompareFilter.CompareOp.EQUAL, new RegexStringComparator( Bytes.toString(org.apache.commons.codec.binary.Base64.decodeBase64(filter)))); LOG.info(" Filter------ " + Bytes.toStringBinary(org.apache.commons.codec.binary.Base64.decodeBase64(filter))); s.setFilter(rowFilterRegex); /*try { getSize(s); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); }*/ scans.add(s); return scans.toArray(new Scan[scans.size()]); } public static void getSize(Scan s) throws IOException { //Scan s = new Scan(); //System.out.print("\n Here \n"); Configuration conf = HBaseConfiguration.create(); HTable table = new HTable(conf, "tsdb"); //HTable tt = new HTable(HBaseConfiguration.create(), "tsdb"); ResultScanner ss = table.getScanner(s); int col = 0; int size = 0; for(Result r:ss){ col = 0; for(KeyValue kv : r.raw()){ col = col+kv.getLength(); // System.out.print("\n Length keyValue " +kv.getLength() + "\n"); } size = size + col/1000; } LOG.info("\n Size of HBase block in KB => " + size); //System.out.print("\n Size in " +size + "\n"); } public static Scan[] generateScansTbl(int caching, boolean cacheBlocks, int batch) { ArrayList<Scan> scans = new ArrayList<Scan>(); Scan s = new Scan(); //s.setCacheBlocks(false); s.setBatch(batch); s.setCaching(caching); // 1 is the default in Scan, which will be bad for // MapReduce jobs s.setCacheBlocks(false); // don't set to true for MR jobs scans.add(s); return scans.toArray(new Scan[scans.size()]); } }