package qa.qcri.aidr.predict.common; import java.io.IOException; import java.util.ArrayList; import java.util.LinkedList; import java.util.List; import org.apache.log4j.Logger; import qa.qcri.aidr.predict.DataStore; import qa.qcri.aidr.predict.data.Document; import redis.clients.jedis.Jedis; /** * Abstract class for simplifying handling of pipeline processes that consume * Documents from an Redis queue (input), perform some processing, and push the * Document to another queue (output). It has basic functionality for estimating * its own processing capacity based on recent execution times. * * @author jrogstadius */ public abstract class PipelineProcess implements Runnable { private static Logger logger = Logger.getLogger(PipelineProcess.class); static class ExecutionTime { public double dT; public long T; public ExecutionTime(long T, double dT) { this.T = T; this.dT = dT; } } public byte[] inputQueueName; public byte[] outputQueueName; LinkedList<ExecutionTime> executionTimes = new LinkedList<ExecutionTime>(); public long inputCount = 0; public long outputCount = 0; public void run() { if (inputQueueName == null) { logger.error("No input queue set"); throw new RuntimeException("No input queue set"); } while (true) { if (Thread.interrupted()) return; Jedis jedis = DataStore.getJedisConnection(); try { // Get an item from the input queue (wait for 60 seconds, then // start over) List<byte[]> byteDoc = jedis.blpop(60, inputQueueName); if (byteDoc == null) { idle(); continue; } long startTime = System.nanoTime(); inputCount++; // Deserialize item Document item; try { item = Serializer.deserialize(byteDoc.get(1)); } catch (ClassNotFoundException | IOException e) { logger.error("Error when deserializing input document."); continue; } // Process the item logger.debug("Going to process item from crisis: " + item.getCrisisCode() + ", having docType: " + item.getDoctype() + ", with id: " + item.getDocumentID()); logger.debug("Raw json from REDIS: " + byteDoc.get(1)); processItem(item); // Push to output queue if (outputQueueName != null) { try { jedis.rpush(outputQueueName, Serializer.serialize(item)); outputCount++; } catch (IOException e) { logger.error("Error when serializing output document."); } } long stopTime = System.nanoTime(); double latency = (stopTime - startTime) / 1000000.0; pushExecutionTime(System.currentTimeMillis(), latency); } finally { DataStore.close(jedis); } } } synchronized void pushExecutionTime(long T, double dT) { executionTimes.add(new ExecutionTime(T, dT)); if (executionTimes.size() > 100) executionTimes.remove(); } public synchronized double getMaxItemsPerSecond() { if (executionTimes.size() < 1) return 1; ExecutionTime[] times = executionTimes .toArray(new ExecutionTime[executionTimes.size()]); double sum = 0; for (int i = 0; i < times.length; i++) sum += times[i].dT; double meanSecondsPerItem = 0.001 * sum / (double) times.length; return 1.0 / meanSecondsPerItem; } public synchronized double getCurrentItemsPerSecond() { long now = System.currentTimeMillis(); ArrayList<ExecutionTime> past5seconds = new ArrayList<ExecutionTime>(); for (ExecutionTime t : executionTimes) { if (now - t.T < 5000) past5seconds.add(t); } if (past5seconds.size() < 2) return 0; return 1000 * (double) past5seconds.size() / (past5seconds.get(past5seconds.size() - 1).T - past5seconds .get(0).T); } public double getLoad() { return getCurrentItemsPerSecond() / getMaxItemsPerSecond(); } protected abstract void processItem(Document item); protected void idle() { } }