package com.hadooparchitecturebook.frauddetection;
import com.hadooparchitecturebook.frauddetection.Utils.HBaseUtils;
import com.hadooparchitecturebook.frauddetection.Utils.UserProfileUtils;
import com.hadooparchitecturebook.frauddetection.model.*;
import com.hadooparchitecturebook.frauddetection.model.Action;
import com.google.common.cache.*;
import org.apache.flume.Event;
import org.apache.flume.api.NettyAvroRpcClient;
import org.apache.flume.api.RpcClientConfigurationConstants;
import org.apache.flume.api.RpcClientFactory;
import org.apache.flume.event.SimpleEvent;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.*;
public class EventProcessor {
public static final int MAX_BATCH_PUT_SIZE = 1000;
public static final int HBASE_PULL_FLUSH_WAIT_TIME = 5;
static Logger LOG = Logger.getLogger(EventProcessor.class);
//Models
static LoadingCache<String, UserProfile> profileLocalCache;
static ValidationRules validationRules;
static HConnection hConnection;
static EventProcessor eventProcessor;
static LinkedBlockingQueue<Map.Entry<UserProfile, UserEvent>> pendingUserProfileUpdates = new LinkedBlockingQueue<Map.Entry<UserProfile, UserEvent>>();
static LinkedBlockingQueue<Action> pendingFlumeSubmits = new LinkedBlockingQueue<Action>();
static ScheduledThreadPoolExecutor scheduledThreadPoolExecutor;
static ThreadPoolExecutor executorService;
static ThreadPoolExecutor checkPutThreadPoolExecutor;
static boolean isRunning;
static List<String> flumeHostPortList = new ArrayList<String>();
static boolean doCheckPutOnUserProfiles;
private EventProcessor() {
}
public static synchronized EventProcessor initAndStartEventProcess(Configuration hbaseConfig, List<String> flumeList, boolean doCheckPutOnUserProfiles) throws IOException {
if (eventProcessor == null) {
eventProcessor = new EventProcessor();
LOG.info("Init caching object");
eventProcessor.profileLocalCache =
CacheBuilder.newBuilder().maximumSize(10000).initialCapacity(1000).removalListener(new RemovalListener<String, UserProfile>() {
@Override
public void onRemoval(RemovalNotification<String, UserProfile> notification) {
LOG.info("LoadingCache Removing: key " + notification.getKey());
}
}).build(new CacheLoader<String, UserProfile>() {
public UserProfile load(String key) { // no checked exception
LOG.info("LoadingCache load: key " + key);
return eventProcessor.loadProfileFromHBase(key);
}
});
LOG.info("Init HBase Connection");
//Configuration config = HBaseConfiguration.create();
hConnection = HConnectionManager.createConnection(hbaseConfig);
scheduledThreadPoolExecutor = new ScheduledThreadPoolExecutor(5);
ValidationRuleFetcher.updateValidationRules();
scheduledThreadPoolExecutor.scheduleAtFixedRate(new ValidationRuleFetcher(), 5l, 5l, TimeUnit.MINUTES);
executorService = new ThreadPoolExecutor(20, 20, 10, TimeUnit.MINUTES, new LinkedBlockingQueue<Runnable>());
EventProcessor.doCheckPutOnUserProfiles = doCheckPutOnUserProfiles;
if (doCheckPutOnUserProfiles) {
checkPutThreadPoolExecutor = new ThreadPoolExecutor(20, 20, 10, TimeUnit.MINUTES, new LinkedBlockingQueue<Runnable>());
}
flumeHostPortList.addAll(flumeList);
isRunning = true;
Thread hbaseFlusher = new Thread(new HBaseFlusher());
hbaseFlusher.start();
Thread flumeFluster = new Thread(new FlumeFlusher());
flumeFluster.start();
}
return eventProcessor;
}
public static void stopAllEventProcessing() {
isRunning = false;
scheduledThreadPoolExecutor.shutdown();
}
public Action reviewUserEvent(final UserEvent userEvent) throws ExecutionException, InterruptedException {
final UserProfile userProfile = profileLocalCache.get(userEvent.userId);
final Action action = UserProfileUtils.reviewUserEvent(userEvent, userProfile, validationRules);
if (action.accept) {
userProfile.updateWithUserEvent(userEvent);
}
Future<Boolean> futureHBase = executorService.submit(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
pendingUserProfileUpdates.put(new AbstractMap.SimpleEntry<UserProfile, UserEvent>(userProfile, userEvent));
LOG.info("EventProcessor: pushed to HBase buffer:" + action.alert);
synchronized (userProfile) {
userProfile.wait();
}
LOG.info("EventProcessor: pushed to HBase:" + action.alert);
return true;
}
});
Future<Boolean> futureFlume = executorService.submit(new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
LOG.info("EventProcessor: push to flume:" + action.alert);
pendingFlumeSubmits.put(action);
LOG.info("EventProcessor: pushed to flume buffer:" + action.alert);
synchronized (action) {
action.wait();
}
LOG.info("EventProcessor: pushed to flume:" + action.alert);
return true;
}
});
futureFlume.get();
futureHBase.get();
return action;
}
public UserProfile loadProfileFromHBase(String key) {
LOG.info("Getting " + key + " from HBase with rowkey{" + key + "}");
try {
byte[] rowKey = HBaseUtils.convertKeyToRowKey(HBaseTableMetaModel.profileCacheTableName, key);
LOG.info("Getting " + key + " from HBase with rowkey{" + Bytes.toString(rowKey) + "}");
Get get = new Get(rowKey);
HTableInterface table = hConnection.getTable(HBaseTableMetaModel.profileCacheTableName);
try {
Result result = table.get(get);
if (result != null) {
NavigableMap<byte[], byte[]> familyMap = result
.getFamilyMap(HBaseTableMetaModel.profileCacheColumnFamily);
return UserProfileUtils.createUserProfile(familyMap);
} else {
// If the UserProfile didn't exist, we create one and write it to HBase immediately
// Since it never existed, no need to checkAndPut
UserProfile userProfile = new UserProfile();
userProfile.userId = key;
Put put = new Put(rowKey);
put.add(HBaseTableMetaModel.profileCacheColumnFamily, HBaseTableMetaModel.profileCacheJsonColumn, Bytes.toBytes(userProfile.getJSONObject().toString()));
put.add(HBaseTableMetaModel.profileCacheColumnFamily, HBaseTableMetaModel.profileCacheTsColumn, Bytes.toBytes(System.currentTimeMillis()));
table.put(put);
return userProfile;
}
} finally {
table.close();
}
} catch (Exception e) {
throw new RuntimeException("Unable to get record from HBase:" + key, e);
}
}
public static class ValidationRuleFetcher implements Runnable {
@Override
public void run() {
updateValidationRules();
}
public static void updateValidationRules() {
LOG.info("Updating validationRules");
try {
Get get = new Get(HBaseTableMetaModel.validationRulesRowKey);
HTableInterface table = hConnection.getTable(HBaseTableMetaModel.validationRulesTableName);
NavigableMap<byte[], byte[]> familyMap = table.get(get)
.getFamilyMap(HBaseTableMetaModel.validationRulesColumnFamily);
table.close();
LOG.info("loading validations");
validationRules = ValidationRules.Builder.buildValidationRules(familyMap);
} catch (Exception e) {
LOG.error(e);
throw new RuntimeException("Unable to create validation rules: ", e);
}
}
}
public static class HBaseFlusher implements Runnable {
@Override
public void run() {
while (isRunning) {
List<Map.Entry<UserProfile, UserEvent>> userProfileList = new ArrayList<Map.Entry<UserProfile, UserEvent>>();
// We read a batch of profiles from an event queue
try {
for (int i = 0; i < MAX_BATCH_PUT_SIZE; i++) {
Map.Entry<UserProfile, UserEvent> entry = pendingUserProfileUpdates.poll();
if (entry == null) {
break;
}
userProfileList.add(entry);
}
if (userProfileList.size() > 0) {
final HTableInterface table = hConnection.getTable(HBaseTableMetaModel.profileCacheTableName);
if (doCheckPutOnUserProfiles) {
List< Future<Map.Entry<UserProfile, UserEvent>>> futureList = new ArrayList< Future<Map.Entry<UserProfile, UserEvent>>>();
while(!userProfileList.isEmpty()) {
for (final Map.Entry<UserProfile, UserEvent> entry: userProfileList) {
futureList.add(checkPutThreadPoolExecutor.submit(new Callable<Map.Entry<UserProfile, UserEvent>>() {
@Override
public Map.Entry<UserProfile, UserEvent> call() throws Exception {
try {
byte[] rowKey = HBaseUtils.convertKeyToRowKey(HBaseTableMetaModel.profileCacheTableName, entry.getKey().userId);
Put put = new Put(rowKey);
put.add(HBaseTableMetaModel.profileCacheColumnFamily,
HBaseTableMetaModel.profileCacheJsonColumn,
Bytes.toBytes(entry.getKey().getJSONObject().toString()));
put.add(HBaseTableMetaModel.profileCacheColumnFamily,
HBaseTableMetaModel.profileCacheTsColumn,
Bytes.toBytes(Long.toString(System.currentTimeMillis())));
long timeStamp = entry.getKey().lastUpdatedTimeStamp;
while (!table.checkAndPut(rowKey,
HBaseTableMetaModel.profileCacheColumnFamily,
HBaseTableMetaModel.profileCacheTsColumn,
Bytes.toBytes(Long.toString(timeStamp)),
put)) {
//We reached here because someone else modified out userProfile
Get get = new Get(rowKey);
Result result = table.get(get);
NavigableMap<byte[], byte[]> familyMap = result.getFamilyMap(HBaseTableMetaModel.profileCacheColumnFamily);
timeStamp = Bytes.toLong(familyMap.get(HBaseTableMetaModel.profileCacheTsColumn));
UserProfile userProfile = new UserProfile(
Bytes.toString(familyMap.get(HBaseTableMetaModel.profileCacheJsonColumn)), timeStamp);
userProfile.updateWithUserEvent(entry.getValue());
put = new Put(rowKey);
put.add(HBaseTableMetaModel.profileCacheColumnFamily,
HBaseTableMetaModel.profileCacheJsonColumn,
Bytes.toBytes(userProfile.getJSONObject().toString()));
put.add(HBaseTableMetaModel.profileCacheColumnFamily,
HBaseTableMetaModel.profileCacheTsColumn,
Bytes.toBytes(Long.toString(System.currentTimeMillis())));
}
} catch (IOException e) {
return entry;
}
return null;
}
}));
}
userProfileList.clear();
for ( Future<Map.Entry<UserProfile, UserEvent>> future : futureList) {
Map.Entry<UserProfile, UserEvent> entry = future.get();
if (entry != null) {
userProfileList.add(entry);
}
}
}
} else {
List<Put> putList = new ArrayList<Put>();
for (Map.Entry<UserProfile, UserEvent> entry: userProfileList) {
Put put = new Put(HBaseUtils.convertKeyToRowKey(HBaseTableMetaModel.profileCacheTableName, entry.getKey().userId));
put.add(HBaseTableMetaModel.profileCacheColumnFamily,
HBaseTableMetaModel.profileCacheJsonColumn,
Bytes.toBytes(entry.getKey().getJSONObject().toString()));
put.add(HBaseTableMetaModel.profileCacheColumnFamily,
HBaseTableMetaModel.profileCacheTsColumn,
Bytes.toBytes(Long.toString(System.currentTimeMillis())));
putList.add(put);
}
table.put(putList);
}
table.close();
}
} catch (Throwable t) {
try {
LOG.error("Problem in HBaseFlusher", t);
pendingUserProfileUpdates.addAll(userProfileList);
} catch (Throwable t2) {
LOG.error("Problem in HBaseFlusher when trying to return puts to queue", t2);
}
} finally {
for (Map.Entry<UserProfile, UserEvent> entry: userProfileList) {
UserProfile userProfile = entry.getKey();
synchronized (userProfile) {
userProfile.notify();
}
}
}
}
try {
Thread.sleep(HBASE_PULL_FLUSH_WAIT_TIME);
} catch (InterruptedException e) {
LOG.error("Problem in HBaseFlusher", e);
}
}
}
public static class FlumeFlusher implements Runnable {
int flumeHost = 0;
@Override
public void run() {
NettyAvroRpcClient client = null;
while (isRunning) {
if (client == null) {
client = getClient();
}
List<Event> eventActionList = new ArrayList<Event>();
List<Action> actionList = new ArrayList<Action>();
try {
for (int i = 0; i < MAX_BATCH_PUT_SIZE; i++) {
Action action = pendingFlumeSubmits.poll();
if (action == null) {
break;
}
Event event = new SimpleEvent();
event.setBody(Bytes.toBytes(action.getJSONObject().toString()));
eventActionList.add(event);
actionList.add(action);
}
if (eventActionList.size() > 0) {
client.appendBatch(eventActionList);
}
} catch (Throwable t) {
try {
LOG.error("Problem in HBaseFlusher", t);
pendingFlumeSubmits.addAll(actionList);
actionList.clear();
client = null;
} catch (Throwable t2) {
LOG.error("Problem in HBaseFlusher when trying to return puts to queue", t2);
}
} finally {
for (Action action: actionList) {
synchronized (action) {
action.notify();
}
}
}
}
try {
Thread.sleep(HBASE_PULL_FLUSH_WAIT_TIME);
} catch (InterruptedException e) {
LOG.error("Problem in HBaseFlusher", e);
}
}
private NettyAvroRpcClient getClient() {
Properties starterProp = new Properties();
starterProp.setProperty(RpcClientConfigurationConstants.CONFIG_HOSTS, "h1");
String hostPort = flumeHostPortList.get(flumeHost);
starterProp.setProperty(RpcClientConfigurationConstants.CONFIG_HOSTS_PREFIX + "h1", hostPort);
flumeHost++;
if (flumeHost == flumeHostPortList.size()) { flumeHost = 0; }
LOG.info("EventProcessor: Trying to connect to " + hostPort);
NettyAvroRpcClient client = (NettyAvroRpcClient) RpcClientFactory.getInstance(starterProp);
LOG.info("EventProcessor: Connected to " + hostPort);
return client;
}
}
}