package apex.benchmark;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.apex.malhar.lib.dimensions.DimensionsEvent.Aggregate;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.datatorrent.api.Context.OperatorContext;
import com.datatorrent.contrib.dimensions.AppDataSingleSchemaDimensionStoreHDHT;
/**
* This class is used for measure the benchmark for dimension computation.
*
* The latency measured by dimension computation only count the time difference between generator and aggregation
* But it doesn't count the time of aggregation unifier and store.
*
* This class logged the max time of endWindow (all computation and store should be done before endWindow)
* and measure the latency as max(time of end window of same application window) - application window begin time - application window length.
*
*/
public class ProcessTimeAwareStore extends AppDataSingleSchemaDimensionStoreHDHT
{
private static final transient Logger logger = LoggerFactory.getLogger(ProcessTimeAwareStore.class);
private static final long serialVersionUID = 3133508545508966286L;
protected Map<Long, Map<String, Long>> bucketToKeyToUpdateTime = Maps.newHashMap();
protected transient List<Aggregate> aggregateList = Lists.newArrayList();
protected int logUpdateWindows = 1400; // 10 minutes + 100 seconds, ignore 20 window
protected int windowCountForLog = 0;
protected int loggedTimes = 0;
protected int ignoreWindows = 20;
@Override
public void processEvent(Aggregate gae)
{
super.processEvent(gae);
aggregateList.add(gae);
}
@Override
public void setup(OperatorContext context)
{
super.setup(context);
lastEndWindowTime = System.currentTimeMillis();
logger.info("Store setup time: {}", System.currentTimeMillis());
}
private long lastEndWindowTime;
@Override
public void endWindow()
{
long endWindowStartTime = System.currentTimeMillis();
super.endWindow();
//update bucket update time;
for(Aggregate aggregate : aggregateList) {
updateUpdateTime(getKey(aggregate), getBucket(aggregate), System.currentTimeMillis());
}
aggregateList.clear();
if(++windowCountForLog == logUpdateWindows){
//logUpdateTime();
logger.info("Logging Times: {}", loggedTimes);
logFinalLatencies();
logger.info("Logging Times: {}", loggedTimes);
windowCountForLog = 0;
++loggedTimes;
}
//window set to 200
long windowPeriod = System.currentTimeMillis() - lastEndWindowTime;
if(windowPeriod > 400) {
logger.warn("Unexpected long window period: {}, endWindow executed time: {}", windowPeriod, System.currentTimeMillis() - endWindowStartTime);
}
lastEndWindowTime = System.currentTimeMillis();
}
protected String getKey(Aggregate aggregate)
{
return aggregate.getEventKey().getKey().getFieldsString()[0];
}
//all the buckets are same range, so just use begin or end time of the bucket to identify
protected long getBucket(Aggregate aggregate)
{
return aggregate.getEventKey().getKey().getFieldsLong()[0];
}
protected void updateUpdateTime(String key, long bucket, long processTime)
{
Map<String, Long> keyToUpdateTime = bucketToKeyToUpdateTime.get(bucket);
if(keyToUpdateTime == null) {
keyToUpdateTime = Maps.newHashMap();
bucketToKeyToUpdateTime.put(bucket, keyToUpdateTime);
}
keyToUpdateTime.put(key, processTime);
}
/**
* for log, ignore first and last bucket as it maybe not complete
*/
protected void logFinalLatencies()
{
List<Long> latencies = Lists.newArrayList();
Set<Long> buckets = bucketToKeyToUpdateTime.keySet();
List<Long> bucketList = Lists.newArrayList();
bucketList.addAll(buckets);
Collections.sort(bucketList);
//remove beginning 10 windows
if (bucketList.size() <= ignoreWindows) {
return;
}
for(int i = 0; i<ignoreWindows; ++i) {
bucketList.remove(0);
}
bucketList.remove(bucketList.size() - 1);
for(Long bucket : bucketList) {
for(long updateTime : bucketToKeyToUpdateTime.get(bucket).values()) {
//10000 is 10 seconds, the length of the window
latencies.add(updateTime - bucket - 10000);
}
}
Collections.sort(latencies);
outputLatencyDetail(latencies);
outputGroupByCount(latencies);
}
protected static void outputLatencyDetail(List<Long> latencies)
{
StringBuilder sb = new StringBuilder();
for (long latency : latencies) {
sb.append(latency).append("\n");
}
logger.info("latencies: \n {}", sb.toString());
}
/**
* Each group have same count
* @param latencies
*/
protected static void outputGroupByCount(List<Long> latencies)
{
final int groups = 10;
int totalCount = latencies.size();
int step = totalCount / groups;
int i=0;
StringBuilder sb = new StringBuilder();
for(; i<groups-1; ++i) {
sb.append("" + i * 100/groups + " - " + (i+1) *100/groups + ": " + latencies.get(step * (i+1))).append("\n");
}
//last one
sb.append("" + i * 100/groups + " - 100: " + latencies.get(latencies.size() - 1)).append("\n");
logger.info("latency group by count: \n {}", sb.toString());
}
}