package org.jai.hbase;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.UUID;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MiniHBaseCluster;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTableInterface;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp;
import org.apache.hadoop.hbase.filter.Filter;
import org.apache.hadoop.hbase.filter.PrefixFilter;
import org.apache.hadoop.hbase.filter.SingleColumnValueFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.MiniZooKeeperCluster;
import org.jai.flume.sinks.hbase.serializer.HbaseJsonEventSerializer;
import org.jai.hadoop.HadoopClusterService;
import org.jai.search.model.SearchFacetName;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.hadoop.hbase.HbaseTemplate;
import org.springframework.data.hadoop.hbase.RowMapper;
import org.springframework.data.hadoop.hbase.TableCallback;
import org.springframework.stereotype.Service;
import com.google.common.base.Functions;
import com.google.common.collect.Ordering;
@Service
public class HbaseServiceImpl implements HbaseService {
private static final Logger LOG = LoggerFactory
.getLogger(HbaseServiceImpl.class);
@Autowired
private HbaseTemplate hbaseTemplate;
@Autowired
private HadoopClusterService hadoopClusterService;
private MiniHBaseCluster miniHBaseCluster;
private MiniZooKeeperCluster miniZooKeeperCluster;
@Override
public void setup() {
try {
LOG.info("Setting up Hbase mini cluster!");
File clusterTestDirRoot = new File("target/zookeeper");
clusterTestDirRoot.delete();
File clusterTestDir = new File(clusterTestDirRoot, "/dfscluster_"
+ UUID.randomUUID().toString()).getAbsoluteFile();
LOG.info("Setting up Hbase zookeeper mini cluster!");
int clientPort = 10235;
miniZooKeeperCluster = new MiniZooKeeperCluster();
miniZooKeeperCluster.setDefaultClientPort(clientPort);
miniZooKeeperCluster.startup(clusterTestDir);
LOG.info("Setting up Hbase zookeeper mini cluster done!");
LOG.info("Setting up Hbase mini cluster master!");
Configuration config = HBaseConfiguration.create();
config.set("hbase.tmp.dir",
new File("target/hbasetom").getAbsolutePath());
config.set("hbase.rootdir", hadoopClusterService.getHDFSUri()
+ "/hbase");
config.set("hbase.master.port", "44335");
config.set("hbase.master.info.port", "44345");
config.set("hbase.regionserver.port", "44435");
config.set("hbase.regionserver.info.port", "44445");
config.set("hbase.master.distributed.log.replay", "false");
config.set("hbase.cluster.distributed", "false");
config.set("hbase.master.distributed.log.splitting", "false");
// hbase.zookeeper.peerport
// hbase.zookeeper.leaderport
config.set("hbase.zookeeper.property.clientPort",
Integer.toString(clientPort));
config.set("zookeeper.znode.parent", "/hbase");
miniHBaseCluster = new MiniHBaseCluster(config, 1);
miniHBaseCluster.startMaster();
LOG.info("Setting up Hbase mini cluster done!");
} catch (IOException | InterruptedException e) {
String errMsg = "Error occured starting Mini Hbase cluster";
LOG.error(errMsg);
throw new RuntimeException(errMsg, e);
}
setupSearchEventsTable();
}
@Override
public void shutdown() {
try {
miniHBaseCluster.stopRegionServer(1);
miniHBaseCluster.stopMaster(1);
miniHBaseCluster.waitUntilShutDown(); //shutdown();
miniZooKeeperCluster.shutdown();
} catch (IOException e) {
// Don't do anything.
// e.printStackTrace();
}
}
@Override
public void setupSearchEventsTable() {
LOG.debug("Setting up searchclicks table!");
String tableName = "searchclicks";
TableName name = TableName.valueOf(tableName);
HTableDescriptor desc = new HTableDescriptor(name);
desc.addFamily(new HColumnDescriptor(
HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES));
desc.addFamily(new HColumnDescriptor(
HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES));
desc.addFamily(new HColumnDescriptor(
HbaseJsonEventSerializer.COLUMFAMILY_SEARCH_BYTES));
desc.addFamily(new HColumnDescriptor(
HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES));
try {
HBaseAdmin hBaseAdmin = new HBaseAdmin(miniHBaseCluster.getConf());
hBaseAdmin.createTable(desc);
hBaseAdmin.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
LOG.debug("Setting up searchclicks table done!");
}
@Override
public void insertEventData(final byte[] body) {
LOG.debug("Inserting searchclicks table row content event!");
hbaseTemplate.execute("searchclicks", new TableCallback<Object>() {
@Override
public Object doInTable(HTableInterface table) throws Throwable {
String rowId = UUID.randomUUID().toString();
Put p = new Put(Bytes.toBytes(rowId));
LOG.debug("Inserting searchclicks table row id: {}", rowId);
p.add(HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES, Bytes.toBytes("eventid"), body);
table.put(p);
table.close();
return null;
}
});
LOG.debug("Inserting searchclicks table row content event done!");
}
@Override
public List<String> getSearchClicks() {
LOG.debug("Checking searchclicks table content!");
Scan scan = new Scan();
scan.addFamily(HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES);
scan.addFamily(HbaseJsonEventSerializer.COLUMFAMILY_SEARCH_BYTES);
scan.addFamily(HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES);
List<String> rows = hbaseTemplate.find("searchclicks", scan,
new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum)
throws Exception {
return Arrays.toString(result.rawCells());
}
});
for (String row : rows) {
LOG.debug("searchclicks table content, Table returned row: {}", row);
}
LOG.debug("Checking searchclicks table content done!");
return rows;
}
@Override
public List<String> getSearchClicksRowKeysWithValidQueryString() {
LOG.debug("Checking getSearchClicksRowKeys searchclicks table content!");
Scan scan = new Scan();
scan.addFamily(HbaseJsonEventSerializer.COLUMFAMILY_SEARCH_BYTES);
SingleColumnValueFilter filter = new SingleColumnValueFilter(HbaseJsonEventSerializer.COLUMFAMILY_SEARCH_BYTES,
Bytes.toBytes("querystring"), CompareOp.NOT_EQUAL, Bytes.toBytes("jaiblahblah"));
filter.setFilterIfMissing(true);
scan.setFilter(filter);
List<String> rows = hbaseTemplate.find("searchclicks", scan,
new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum)
throws Exception {
return new String(result.getRow());
}
});
for (String row : rows) {
LOG.debug("searchclicks table content, Table returned row key: {}", row);
}
LOG.debug("Checking getSearchClicksRowKeys searchclicks table content done!");
return rows;
}
@Override
public int getTotalSearchClicksCount() {
LOG.debug("Checking searchclicks table count!");
int totalCount = 0;
List<String> rows = hbaseTemplate.find("searchclicks", new String(
HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES),
new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum)
throws Exception {
// return new
// String(result.getValue(Bytes.toBytes("event"),
// Bytes.toBytes("json")));
return new String(result.value());
}
});
for (String row : rows) {
LOG.debug("Table count returned row is : {}", row);
totalCount++;
}
LOG.debug("Checking searchclicks table count done!");
return totalCount;
}
@Override
public void removeAll() {
LOG.debug("Setting up searchclicks table!");
String tableName = "searchclicks";
try {
HBaseAdmin hBaseAdmin = new HBaseAdmin(miniHBaseCluster.getConf());
hBaseAdmin.disableTable(tableName);
hBaseAdmin.deleteTable(tableName);
hBaseAdmin.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
;
setupSearchEventsTable();
LOG.debug("Setting up searchclicks table done!");
}
@Override
public void testHbaseServer() {
LOG.debug("Testing hbase server!");
String tableName = "MyTable";
TableName name = TableName.valueOf(tableName);
HTableDescriptor desc = new HTableDescriptor(name);
HColumnDescriptor columnFamily = new HColumnDescriptor(
"SomeColumn".getBytes());
desc.addFamily(columnFamily);
try {
HBaseAdmin hBaseAdmin = new HBaseAdmin(miniHBaseCluster.getConf());
hBaseAdmin.createTable(desc);
hBaseAdmin.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
;
// writing to 'MyTable'
hbaseTemplate.execute(tableName, new TableCallback<Object>() {
@Override
public Object doInTable(HTableInterface table) throws Throwable {
Put p = new Put(Bytes.toBytes("SomeRow"));
p.add(Bytes.toBytes("SomeColumn"),
Bytes.toBytes("SomeQualifier"), Bytes.toBytes("AValue"));
table.put(p);
return null;
}
});
// read each row from 'MyTable'
List<String> rows = hbaseTemplate.find("MyTable", "SomeColumn",
new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum)
throws Exception {
return result.toString();
}
});
for (String row : rows) {
System.out.println("Printing row:" + row);
}
LOG.debug("Hbase server testing done!");
}
@Override
public int findTotalRecordsForValidCustomers() {
int totalCount = 0;
List<String> rows = hbaseTemplate.find("searchclicks", new String(
HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES),
new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum)
throws Exception {
String customerid = new String(
result.getValue(
HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES,
Bytes.toBytes("customerid")));
return customerid;
}
});
for (String row : rows) {
LOG.debug(
"Table count findTotalRecordsForValidCustomers returned row is : {}",
row);
if (row != null) {
totalCount++;
}
}
LOG.debug(
"findTotalRecordsForValidCustomers searchclicks table count is: {}",
totalCount);
return totalCount;
}
@Override
public List<String> findTopTenSearchQueryStringForLastAnHour() {
Scan scan = new Scan();
scan.addColumn(HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES,
Bytes.toBytes("createdtimestampinmillis"));
scan.addColumn(HbaseJsonEventSerializer.COLUMFAMILY_SEARCH_BYTES,
Bytes.toBytes("querystring"));
List<String> topQueries = new ArrayList<>();
Map<String, Integer> counts = new HashMap<>();
List<String> rows = hbaseTemplate.find("searchclicks", scan,
new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum)
throws Exception {
String createdtimestampinmillis = new String(
result.getValue(
HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES,
Bytes.toBytes("createdtimestampinmillis")));
byte[] value = result
.getValue(
HbaseJsonEventSerializer.COLUMFAMILY_SEARCH_BYTES,
Bytes.toBytes("querystring"));
String querystring = null;
if (value != null) {
querystring = new String(value);
}
LOG.debug(
"findTopTenSearchQueryStringForLastAnHour returned row is, time: {} , querystring: {}",
new Object[] { createdtimestampinmillis,
querystring });
if (new DateTime(Long.valueOf(createdtimestampinmillis))
.plusHours(1).compareTo(new DateTime()) == 1
&& querystring != null) {
return querystring;
}
return null;
}
});
for (String row : rows) {
if (row != null) {
Integer integer = counts.get(row);
if (integer == null) {
counts.put(row, Integer.valueOf(1));
} else {
counts.put(row, Integer.valueOf(integer.intValue() + 1));
}
LOG.debug(
"findTopTenSearchQueryStringForLastAnHour valid query string value is : {}",
row);
}
}
List<String> sortedKeys = Ordering.natural()
.onResultOf(Functions.forMap(counts))
.immutableSortedCopy(counts.keySet());
int recordsCount = sortedKeys.size();
for (int j = 1; j <= 10 && j < sortedKeys.size(); j++) {
String queryString = sortedKeys.get(recordsCount - j);
LOG.debug("Top queries are sortedKeys, query: {}",
new Object[] { queryString });
topQueries.add(queryString);
}
// Ordering<String> onResultOf = Ordering.natural().onResultOf(
// Functions.forMap(counts)).compound(Ordering.natural());
// for (Entry<String, Integer> querString : ImmutableSortedMap.copyOf(
// counts, onResultOf).entrySet()) {
// LOG.debug("Top queries are, query: {}, count: {}", new Object[] {
// querString.getKey(), querString.getValue() });
// topQueries.add(querString.getKey());
// }
LOG.debug("Checking findTopTenSearchQueryStringForLastAnHour done!");
return topQueries;
}
@Override
public List<String> findTopTenSearchFiltersForLastAnHour() {
Scan scan = new Scan();
scan.addColumn(HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES,
Bytes.toBytes("createdtimestampinmillis"));
for (String facetField : SearchFacetName.categoryFacetFields) {
scan.addColumn(HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES,
Bytes.toBytes(facetField));
}
List<String> topFacetFilters = new ArrayList<>();
final Map<String, List<String>> columnData = new HashMap<>();
hbaseTemplate.find("searchclicks", scan, new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum) throws Exception {
String createdtimestampinmillis = new String(result.getValue(
HbaseJsonEventSerializer.COLUMFAMILY_CLIENT_BYTES,
Bytes.toBytes("createdtimestampinmillis")));
for (String facetField : SearchFacetName.categoryFacetFields) {
byte[] value = result.getValue(
HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES,
Bytes.toBytes(facetField));
if (value != null
&& new DateTime(Long
.valueOf(createdtimestampinmillis))
.plusHours(1).compareTo(new DateTime()) == 1) {
String facetValue = new String(value);
LOG.debug("Facet field: {} and Facet Value: {}",
new Object[] { facetField, facetValue });
List<String> list = columnData.get(facetField);
if (list == null) {
list = new ArrayList<>();
list.add(facetValue);
columnData.put(facetField, list);
} else {
list.add(facetValue);
}
}
}
return null;
}
});
final Map<String, Integer> counts = new HashMap<>();
String separatorToken = "_jaijai_";
for (Entry<String, List<String>> entry : columnData.entrySet()) {
for (String facetFilterValue : entry.getValue()) {
String key = entry.getKey() + separatorToken + facetFilterValue;
Integer integer = counts.get(key);
if (integer == null) {
counts.put(key, Integer.valueOf(1));
} else {
counts.put(key, Integer.valueOf(integer.intValue() + 1));
}
}
}
List<String> sortedKeys = Ordering.natural()
.onResultOf(Functions.forMap(counts))
.immutableSortedCopy(counts.keySet());
for (int j = 1; j <= 10 && j < sortedKeys.size(); j++) {
String queryString = sortedKeys.get(sortedKeys.size() - j);
String[] split = queryString.split(separatorToken);
LOG.debug(
"Top 10 filters are sortedKeys, FacetCode: {}, FacetValue:{}, Count:{}",
new Object[] { split[0], split[1], counts.get(queryString) });
topFacetFilters.add(split[1]);
}
LOG.debug("Checking findTopTenSearchQueryStringForLastAnHour done!");
return topFacetFilters;
}
@Override
public List<String> findTopTenSearchFiltersForLastAnHourUsingRangeScan() {
Scan scan = new Scan();
for (String facetField : SearchFacetName.categoryFacetFields) {
scan.addColumn(HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES,
Bytes.toBytes(facetField));
}
DateTime dateTime = new DateTime();
try {
scan.setTimeRange(dateTime.minusHours(1).getMillis(), dateTime.getMillis());
} catch (IOException e) {
throw new RuntimeException(e);
}
List<String> topFacetFilters = new ArrayList<>();
final Map<String, List<String>> columnData = new HashMap<>();
hbaseTemplate.find("searchclicks", scan, new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum) throws Exception {
for (String facetField : SearchFacetName.categoryFacetFields) {
byte[] value = result.getValue(
HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES,
Bytes.toBytes(facetField));
if (value != null) {
String facetValue = new String(value);
LOG.debug("Facet field: {} and Facet Value: {}",
new Object[] { facetField, facetValue });
List<String> list = columnData.get(facetField);
if (list == null) {
list = new ArrayList<>();
list.add(facetValue);
columnData.put(facetField, list);
} else {
list.add(facetValue);
}
}
}
return null;
}
});
final Map<String, Integer> counts = new HashMap<>();
String separatorToken = "_jaijai_";
for (Entry<String, List<String>> entry : columnData.entrySet()) {
for (String facetFilterValue : entry.getValue()) {
String key = entry.getKey() + separatorToken + facetFilterValue;
Integer integer = counts.get(key);
if (integer == null) {
counts.put(key, Integer.valueOf(1));
} else {
counts.put(key, Integer.valueOf(integer.intValue() + 1));
}
}
}
List<String> sortedKeys = Ordering.natural()
.onResultOf(Functions.forMap(counts))
.immutableSortedCopy(counts.keySet());
for (int j = 1; j <= 10 && j < sortedKeys.size(); j++) {
String queryString = sortedKeys.get(sortedKeys.size() - j);
String[] split = queryString.split(separatorToken);
LOG.debug(
"Top 10 filters are sortedKeys, FacetCode: {}, FacetValue:{}, Count:{}",
new Object[] { split[0], split[1], counts.get(queryString) });
topFacetFilters.add(split[1]);
}
LOG.debug("Checking findTopTenSearchFiltersForLastAnHourUsingRangeScan done!");
return topFacetFilters;
}
@Override
public int numberOfTimesAFacetFilterClickedInLastAnHour(final String columnName, final String columnValue) {
Scan scan = new Scan();
scan.addColumn(HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES,
Bytes.toBytes(columnName));
Filter filter = new SingleColumnValueFilter(HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES,
Bytes.toBytes(columnName), CompareOp.EQUAL, Bytes.toBytes(columnValue));
scan.setFilter(filter);
DateTime dateTime = new DateTime();
try {
scan.setTimeRange(dateTime.minusHours(1).getMillis(), dateTime.getMillis());
} catch (IOException e) {
throw new RuntimeException(e);
}
int count =
hbaseTemplate.find("searchclicks", scan, new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum) throws Exception {
byte[] value = result.getValue(
HbaseJsonEventSerializer.COLUMFAMILY_FILTERS_BYTES,
Bytes.toBytes(columnName));
if (value != null) {
String facetValue = new String(value);
LOG.debug("Facet field: {} and Facet Value: {}",
new Object[] { columnName, facetValue });
}
return null;
}
}).size();
LOG.debug("Checking numberOfTimesAFacetFilterClickedInLastAnHour done with count:{}", count);
return count;
}
@Override
public List<String> getAllSearchQueryStringsByCustomerInLastOneMonth(final Long customerId) {
LOG.debug("Calling getAllSearchQueryStringsByCustomerInLastOneMonth for customerid: {}", customerId);
Scan scan = new Scan();
scan.addColumn(HbaseJsonEventSerializer.COLUMFAMILY_SEARCH_BYTES,
Bytes.toBytes("querystring"));
Filter filter = new PrefixFilter(Bytes.toBytes(customerId + "-"));
scan.setFilter(filter);
DateTime dateTime = new DateTime();
try {
scan.setTimeRange(dateTime.minusDays(30).getMillis(), dateTime.getMillis());
} catch (IOException e) {
throw new RuntimeException(e);
}
List<String> rows = hbaseTemplate.find("searchclicks", scan, new RowMapper<String>() {
@Override
public String mapRow(Result result, int rowNum) throws Exception {
LOG.debug("Row is: {}", new String(result.getRow()));
byte[] value = result.getValue(
HbaseJsonEventSerializer.COLUMFAMILY_SEARCH_BYTES,
Bytes.toBytes("querystring"));
String queryString = null;
if (value != null) {
queryString = new String(value);
LOG.debug("Query String: {}",
new Object[] { queryString });
}
return queryString;
}
});
List<String> list = new ArrayList<>();
for (String string : rows) {
if(string !=null )
{
list.add(string);
}
}
LOG.debug("Checking getAllSearchQueryStringsByCustomerInLastOneMonth done with list:{}", list);
return list;
}
}