package org.commoncrawl.service.statscollector;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.Vector;
import java.util.Map.Entry;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DataInputBuffer;
import org.apache.hadoop.io.DataOutputBuffer;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.record.Buffer;
import org.codehaus.jackson.JsonEncoding;
import org.codehaus.jackson.JsonFactory;
import org.codehaus.jackson.JsonGenerator;
import org.commoncrawl.async.CallbackWithResult;
import org.commoncrawl.async.EventLoop;
import org.commoncrawl.async.Timer;
import org.commoncrawl.async.ConcurrentTask.CompletionCallback;
import org.commoncrawl.rpc.base.internal.AsyncRequest.Callback;
import org.commoncrawl.rpc.base.shared.BinaryProtocol;
import org.commoncrawl.rpc.base.shared.RPCStruct;
import org.commoncrawl.service.statscollector.TestRecord;
import org.commoncrawl.util.AsyncAppender;
import org.commoncrawl.util.CCStringUtils;
import org.commoncrawl.util.TimeSeriesDataFile;
import org.commoncrawl.util.TimeSeriesDataFile.KeyValueTuple;
import org.commoncrawl.util.time.Day;
import org.commoncrawl.util.time.Hour;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableMultiset;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ImmutableSortedMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import com.google.common.collect.TreeMultimap;
import com.google.gson.stream.JsonWriter;
/** abstract class representing a collection of hourly & daily stats
*
* @author rana
*
*/
public abstract class StatsCollection<ValueType extends Comparable> {
private StatsLogManager _logManager;
protected String _groupKey;
protected String _uniqueKey;
private TreeMultimap<Hour,ValueType> _hourlyValues = TreeMultimap.create();
private Hour _lowestEventTime = null;
private Hour _highestEventTime = null;
private Day _lastDailyValue = null;
TimeSeriesDataFile<BytesWritable> _sequentialEventsFile = null;
TimeSeriesDataFile<BytesWritable> _dailyEventsFile = null;
private static final Log LOG = LogFactory.getLog(StatsCollection.class);
private static final String PERIODIC_FILE_TYPE = "events";
private static final String DAILY_FILE_TYPE = "daily";
public StatsCollection(StatsLogManager logFileManager,String groupKey,String uniqueKey) throws IOException {
_groupKey = groupKey;
_uniqueKey = uniqueKey;
_logManager = logFileManager;
_sequentialEventsFile = logFileManager.getFileGivenName(_groupKey, _uniqueKey, PERIODIC_FILE_TYPE);
_dailyEventsFile = logFileManager.getFileGivenName(_groupKey, _uniqueKey, DAILY_FILE_TYPE);
loadLastStateFromDisk();
}
/** add an event to the collection
*
* @param hour
* @param event
*/
public final void addValue(long timestamp,ValueType value)throws IOException {
Hour hour = new Hour(new Date(timestamp));
// first add value to set
SortedSet<ValueType> values = _hourlyValues.get(hour);
if (values.size() != 0) {
combineHourlyValues(values.first(),value);
}
else {
values.add(value);
}
if (_highestEventTime != null && _highestEventTime.getDay().compareTo(hour.getDay()) != 0) {
if (_lastDailyValue == null || _highestEventTime.getDay().compareTo(_lastDailyValue) != 0) {
// potentially flush or truncate ...
flushPreviosuDaysEvents((Day)hour.getDay().previous());
}
}
// if locally cached events span more than a 24 hour time
if (_lowestEventTime != null &&
hour.getSerialIndex() - _lowestEventTime.getSerialIndex() > 24) {
// now truncate hourly events if necessary
truncateEvents(hour);
}
// update highest lowest event times
updateEventTimes(hour);
// flush to event log
writeToSequentialEventLog(timestamp,value);
}
/** return hourly values sorted
*
* @return sorted multi map containing hourly values
*/
public TreeMultimap<Hour,ValueType> getHourlyValues() {
return _hourlyValues;
}
public void getDailyValues(final EventLoop eventLoop,final int maxDays,final CompletionCallback<ImmutableSortedMap<Day,ValueType>> callback) {
// schedule a disk read
_logManager.queueDiskIORequest(new Runnable() {
@Override
public void run() {
try {
// get the result set ...
ArrayList<KeyValueTuple<Long, BytesWritable>> tuples = _dailyEventsFile.readFromTail(maxDays, -1);
// walk items adding to builder
final ImmutableSortedMap.Builder<Day,ValueType> builder = new ImmutableSortedMap.Builder<Day, ValueType>(new Comparator<Day>() {
@Override
public int compare(Day o1, Day o2) {
return o1.compareTo(o2);
}
});
for (KeyValueTuple<Long,BytesWritable> tuple : tuples) {
ValueType value = bufferToValueType(new Buffer(tuple.value.getBytes()));
builder.put(new Day(new Date(tuple.key)), value);
}
eventLoop.setTimer(new Timer(0,false,new Timer.Callback() {
@Override
public void timerFired(Timer timer) {
callback.taskComplete(builder.build());
}
}));
} catch (final IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
eventLoop.setTimer(new Timer(0,false,new Timer.Callback() {
@Override
public void timerFired(Timer timer) {
callback.taskFailed(e);
}
}));
}
}
});
}
private final void truncateEvents(final Hour newEventTime) {
final long newEventSerialIndex = newEventTime.getSerialIndex();
// create a filtered set
Hour hoursToRemove[] = Sets.filter(_hourlyValues.keySet(), new Predicate<Hour>() {
@Override
public boolean apply(Hour hour) {
return (newEventSerialIndex - hour.getSerialIndex() > 24);
}
}).toArray(new Hour[0]);
// remove items from set
for (Hour hour : hoursToRemove)
_hourlyValues.removeAll(hour);
// update lowest / higest values ...
SortedSet<Hour> resultSet = _hourlyValues.keySet();
if (resultSet.size() != 0) {
_lowestEventTime = resultSet.first();
_highestEventTime = resultSet.last();
}
else {
_lowestEventTime = null;
_highestEventTime = null;
}
}
private final void flushPreviosuDaysEvents(final Day dayToFlush) throws IOException {
// flush previous days event to disk
Set<Map.Entry<Hour,ValueType>> previousDaysEvents = Sets.filter(_hourlyValues.entries(), new Predicate<Map.Entry<Hour,ValueType>>() {
@Override
public boolean apply(Entry<Hour, ValueType> entry) {
return dayToFlush.equals(entry.getKey().getDay());
}
});
// ok now consolidate previous days events into one event
if (previousDaysEvents.size() != 0) {
ValueType finalValue = createDailyValue(previousDaysEvents);
// ok potentially flush this event to disk
writeToDailyEventLog(dayToFlush,finalValue);
// update last daily value ...
_lastDailyValue = dayToFlush;
}
}
private final void updateEventTimes(Hour newestEventTime) {
// update event times
if (_highestEventTime == null ) {
_lowestEventTime = newestEventTime;
}
_highestEventTime = newestEventTime;
}
private void loadLastStateFromDisk() throws IOException {
// ok, first see if we can extract last timestamp out of the daily events file
long lastDailyTimestamp = _dailyEventsFile.getLastRecordKey();
long restrictByTime = -1;
if (lastDailyTimestamp != -1) {
// convert to Day if valid ...
_lastDailyValue = new Day(new Date(lastDailyTimestamp));
}
// ok now read up to 1000 events from events file
ArrayList<KeyValueTuple<Long,BytesWritable>> events = _sequentialEventsFile.readFromTail(1000, -1);
// ok find latest day that is not today
Day today = new Day(new Date(System.currentTimeMillis()));
Hour thisHour = new Hour(new Date(System.currentTimeMillis()));
Day yesterday = (Day) today.previous();
// walk events in reverse order
Iterable<KeyValueTuple<Long, BytesWritable>> reverseList = Lists.reverse(events);
for (KeyValueTuple<Long, BytesWritable> event : reverseList) {
// collect all events up to yesterday's events
Day eventDay = new Day(new Date(event.key));
if (eventDay.compareTo(yesterday) != -1) {
// process the event ...
Hour eventHour = new Hour(new Date(event.key));
// create the typed object ...
ValueType value = bufferToValueType(new Buffer(event.value.getBytes()));
// add it to event list ...
SortedSet<ValueType> values = _hourlyValues.get(eventHour);
if (values.size() != 0) {
combineHourlyValues(values.first(),value);
}
else {
values.add(value);
}
}
else {
break;
}
}
// flush preivous days events ...
if (_lastDailyValue != null && _lastDailyValue.compareTo(yesterday) != 0) {
flushPreviosuDaysEvents(yesterday);
}
// truncate to 24 hours period
truncateEvents(thisHour);
}
private final void writeToSequentialEventLog(final long timestamp,ValueType value) throws IOException {
final Buffer buffer = valueTypeToBuffer(value);
BytesWritable dataOut = new BytesWritable(buffer.get());
dataOut.setSize(buffer.getCount());
_logManager.queueDiskIORequest(new Runnable() {
@Override
public void run() {
try {
_sequentialEventsFile.appendRecordToLogFile(timestamp, new BytesWritable(buffer.get()));
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
});
}
/**
* flush this value to the daily values log
*
* @param day
* @param finalValue
*/
private final void writeToDailyEventLog(final Day day,final ValueType finalValue) throws IOException {
final Buffer buffer = valueTypeToBuffer(finalValue);
BytesWritable dataOut = new BytesWritable(buffer.get());
dataOut.setSize(buffer.getCount());
_logManager.queueDiskIORequest(new Runnable() {
@Override
public void run() {
try {
_dailyEventsFile.appendRecordToLogFile(day.getFirstMillisecond(), new BytesWritable(buffer.get()));
} catch (IOException e) {
LOG.error(CCStringUtils.stringifyException(e));
}
}
});
}
/**
* combine two values into one value
* @param sourceValue the value to collapse into
* @param otherValue the other value
*/
public abstract void combineHourlyValues(ValueType sourceValue,ValueType otherValue);
/**
* aggregate hourly values to create a daily value
*
* @return new ValueType daily value instance
*/
public abstract ValueType createDailyValue(Set<Map.Entry<Hour,ValueType>> hourlyValueSet);
/**
*
* @param incomingBuffer incoming serialized data buffer
* @return deserialized value type
* @throws IOException
*/
public abstract ValueType bufferToValueType(Buffer incomingBuffer) throws IOException;
/**
*
* @param value value type instance
* @return buffer containing serialied value type
* @throws IOException
*/
public abstract Buffer valueTypeToBuffer(ValueType value) throws IOException;
/**
*
* @param value
* @throws IOException
*/
public abstract void setUniqueKeyInValue(ValueType value);
/** allocate empty hourly value **/
public abstract ValueType allocateValueType();
private static class TestStatsCollection extends StatsCollection<TestRecord> {
public TestStatsCollection(StatsLogManager logFileManager, String groupKey,String uniqueKey) throws IOException {
super(logFileManager, groupKey, uniqueKey);
}
@Override
public TestRecord bufferToValueType(Buffer incomingBuffer) throws IOException {
DataInputBuffer buffer = new DataInputBuffer();
buffer.reset(incomingBuffer.get(),0, incomingBuffer.getCount());
TestRecord recordOut = new TestRecord();
recordOut.deserialize(buffer,new BinaryProtocol());
return recordOut;
}
@Override
public void combineHourlyValues(TestRecord sourceValue, TestRecord otherValue) {
sourceValue.setAverageValue( (sourceValue.getAverageValue() + otherValue.getAverageValue()) / 2.0f );
sourceValue.setCumilativeValue(sourceValue.getCumilativeValue() + otherValue.getCumilativeValue());
}
@Override
public TestRecord createDailyValue(Set<Entry<Hour, TestRecord>> hourlyValueSet) {
TestRecord recordOut = new TestRecord();
float averageValue = 0.0f;
for (Entry<Hour, TestRecord> entry : hourlyValueSet) {
averageValue += entry.getValue().getAverageValue();
recordOut.setCumilativeValue(recordOut.getCumilativeValue() + entry.getValue().getCumilativeValue());
}
recordOut.setAverageValue(averageValue / hourlyValueSet.size());
return recordOut;
}
@Override
public Buffer valueTypeToBuffer(TestRecord value) throws IOException {
DataOutputBuffer bufferOut = new DataOutputBuffer();
value.serialize(bufferOut, new BinaryProtocol());
return new Buffer(bufferOut.getData(),0,bufferOut.getLength());
}
@Override
public void setUniqueKeyInValue(TestRecord value) {
}
@Override
public TestRecord allocateValueType() {
return new TestRecord();
}
}
public void dumpHourlyToJSON(OutputStream stream)throws IOException {
PrintWriter pw = new PrintWriter(stream);
JsonWriter jsonWriter = new JsonWriter(pw);
jsonWriter.beginArray();
Hour now = new Hour(new Date(System.currentTimeMillis()));
Set<Entry<Hour,ValueType>> set = _hourlyValues.entries();
for (Entry<Hour,ValueType> item : set) {
// skip latest hour
if (item.getKey().compareTo(now) != 0) {
jsonWriter.value(item.getValue().toString());
}
}
jsonWriter.endArray();
pw.flush();
pw.close();
}
public void collectHourlyStats(Multimap<Date,ValueType> multiMap) throws IOException {
Hour lastHourToCollect = (Hour) new Hour(new Date(System.currentTimeMillis())).previous();
Hour firstHourToCollect = new Hour(lastHourToCollect.getHour(),(Day)lastHourToCollect.getDay().previous());
while (lastHourToCollect.getSerialIndex() >= firstHourToCollect.getSerialIndex()) {
SortedSet<ValueType> values = _hourlyValues.get(lastHourToCollect);
ValueType value = null;
if (values.size() == 0) {
value = allocateValueType();
}
else {
value = values.first();
}
setUniqueKeyInValue(value);
multiMap.put(new Date(lastHourToCollect.getFirstMillisecond()),value);
lastHourToCollect = (Hour) lastHourToCollect.previous();
}
/*
Set<Entry<Hour,ValueType>> set = _hourlyValues.entries();
for (Entry<Hour,ValueType> item : set) {
// skip latest hour
if (item.getKey().compareTo(now) != 0) {
setUniqueKeyInValue(item.getValue());
multiMap.put(new Date(item.getKey().getFirstMillisecond()),item.getValue());
}
}
*/
}
public void dumpDailyToJSON(final OutputStream stream,final CallbackWithResult<Boolean> completionCallback) throws IOException {
getDailyValues(_logManager.getEventLoop(), 200, new CompletionCallback<ImmutableSortedMap<Day,ValueType>>() {
@Override
public void taskComplete(ImmutableSortedMap<Day, ValueType> loadResult) {
try {
PrintWriter writer = new PrintWriter(stream);
JsonWriter jsonWriter = new JsonWriter(writer);
jsonWriter.beginArray();
for (Entry<Day,ValueType> entry : loadResult.entrySet()) {
jsonWriter.value(entry.getValue().toString());
}
jsonWriter.endArray();
writer.flush();
completionCallback.execute(new Boolean(true));
}
catch (IOException e){
LOG.error(CCStringUtils.stringifyException(e));
completionCallback.execute(new Boolean(false));
}
}
@Override
public void taskFailed(Exception e) {
LOG.error(CCStringUtils.stringifyException(e));
completionCallback.execute(new Boolean(false));
}
});
}
public void collectDailyStats(final Multimap<Date,ValueType> multiMap,final CallbackWithResult<Boolean> completionCallback) throws IOException {
LOG.info("CollectDailyStats called for:" + _groupKey+"-" + _uniqueKey);
getDailyValues(_logManager.getEventLoop(), 200, new CompletionCallback<ImmutableSortedMap<Day,ValueType>>() {
@Override
public void taskComplete(ImmutableSortedMap<Day, ValueType> loadResult) {
LOG.info("Daily Value Load completed for:" + _groupKey+"-" + _uniqueKey + " resultCount:" + loadResult.size());
synchronized (multiMap) {
for (Entry<Day,ValueType> entry : loadResult.entrySet()) {
LOG.info("Adding Daily Entry for:" + _groupKey+"-" + _uniqueKey + " Date:" + new Date(entry.getKey().getFirstMillisecond()));
// set record affinity
setUniqueKeyInValue(entry.getValue());
multiMap.put(new Date(entry.getKey().getFirstMillisecond()), entry.getValue());
}
}
completionCallback.execute(new Boolean(true));
}
@Override
public void taskFailed(Exception e) {
LOG.error(CCStringUtils.stringifyException(e));
completionCallback.execute(new Boolean(false));
}
});
}
/******* TEST CODE ********/
public static void main(String[] args) {
EventLoop eventLoop = new EventLoop();
eventLoop.start();
try {
StatsLogManager logManager = new StatsLogManager(null,new File("/tmp"));
TestStatsCollection statsCollection = new TestStatsCollection(logManager, "test", "001");
Day today = new Day(new Date(System.currentTimeMillis()));
Day yesterday = (Day) today.previous();
Day dayBeforeYesterday = (Day) yesterday.previous();
TestRecord recordTest = new TestRecord();
for (int i=0;i<=47;++i) {
Hour hour = new Hour(i,dayBeforeYesterday);
recordTest.setCumilativeValue(1);
recordTest.setAverageValue(10.0f);
try {
statsCollection.addValue(hour.getFirstMillisecond(),(TestRecord) recordTest.clone());
statsCollection.addValue(hour.getFirstMillisecond()+1,(TestRecord) recordTest.clone());
statsCollection.addValue(hour.getFirstMillisecond()+2,(TestRecord) recordTest.clone());
}
catch (CloneNotSupportedException e ) {
e.printStackTrace();
}
}
Thread.sleep(100);
statsCollection = new TestStatsCollection(logManager, "test", "001");
Hour now = new Hour(new Date(System.currentTimeMillis()));
for (int i=0;i<=now.getHour();++i) {
Hour hour = new Hour(i,today);
recordTest.setCumilativeValue(1);
recordTest.setAverageValue(10.0f);
try {
statsCollection.addValue(hour.getFirstMillisecond(),(TestRecord) recordTest.clone());
statsCollection.addValue(hour.getFirstMillisecond()+1,(TestRecord) recordTest.clone());
statsCollection.addValue(hour.getFirstMillisecond()+2,(TestRecord) recordTest.clone());
}
catch (CloneNotSupportedException e ) {
e.printStackTrace();
}
}
Thread.sleep(100);
statsCollection = new TestStatsCollection(logManager, "test", "001");
TreeMultimap<Hour,TestRecord> hourlyValues = statsCollection.getHourlyValues();
for (Map.Entry<Hour,TestRecord> entry : hourlyValues.entries()) {
System.out.println(entry.getKey().toString() + ":" + entry.getValue().getAverageValue() + "," + entry.getValue().getCumilativeValue());
}
final Semaphore blockingSemaphore = new Semaphore(0);
statsCollection.getDailyValues(eventLoop, 30, new CompletionCallback<ImmutableSortedMap<Day,TestRecord>>() {
@Override
public void taskComplete(ImmutableSortedMap<Day, TestRecord> loadResult) {
for (Entry<Day,TestRecord> entry : loadResult.entrySet()) {
System.out.println("Daily Record. Day:" + entry.getKey().toString() + ":" + entry.getValue().getAverageValue() + "," + entry.getValue().getCumilativeValue());
}
blockingSemaphore.release();
}
@Override
public void taskFailed(Exception e) {
LOG.error(CCStringUtils.stringifyException(e));
blockingSemaphore.release();
}
});
blockingSemaphore.acquireUninterruptibly();
eventLoop.stop();
logManager.shutdown();
}
catch (IOException e) {
e.printStackTrace();
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}