/*
* StreamCruncher: Copyright (c) 2006-2008, Ashwin Jayaprakash. All Rights Reserved.
* Contact: ashwin {dot} jayaprakash {at} gmail {dot} com
* Web: http://www.StreamCruncher.com
*
* This file is part of StreamCruncher.
*
* StreamCruncher is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* StreamCruncher is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with StreamCruncher. If not, see <http://www.gnu.org/licenses/>.
*/
package streamcruncher.innards.core.partition.function;
import java.sql.Timestamp;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.SortedMap;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;
import streamcruncher.api.artifact.RowSpec;
import streamcruncher.innards.core.QueryContext;
import streamcruncher.innards.core.partition.CalculateTSFunctionPair;
import streamcruncher.util.AppendOnlyPrimitiveLongList;
import streamcruncher.util.AtomicX;
import streamcruncher.util.PerpetualResultSet;
/*
* Author: Ashwin Jayaprakash Date: Feb 19, 2006 Time: 11:08:45 AM
*/
public class TimeWindowFunction extends WindowFunction {
protected final long windowSizeMilliseconds;
protected final int maxWindowSize;
protected final int timeColumnBufferPos;
/**
* The Value can either be a single Id or a List Of Ids.
*/
protected final SortedMap<Long, Object> timeStampsAndIds;
protected final SortedSet<Long> idsAddedInCycle;
protected int totalRowIdsInWindow;
protected int watchAndFree;
/**
* @param selectedRowSpec
* @param newRowSpec
* @param rowIdGenerator
* @param sourceLocationForTargetCols
* @param windowSizeMilliseconds
* @param maxWindowSize
*/
public TimeWindowFunction(RowSpec selectedRowSpec, RowSpec newRowSpec, AtomicX rowIdGenerator,
int[] sourceLocationForTargetCols, long windowSizeMilliseconds, int maxWindowSize) {
super(selectedRowSpec, newRowSpec, rowIdGenerator, sourceLocationForTargetCols, Math.min(
maxWindowSize, AppendOnlyPrimitiveLongList.FRAGMENT_SIZE));
this.windowSizeMilliseconds = windowSizeMilliseconds;
this.maxWindowSize = maxWindowSize;
this.timeColumnBufferPos = newRowSpec.getTimestampColumnPosition();
this.timeStampsAndIds = new TreeMap<Long, Object>();
this.idsAddedInCycle = new TreeSet<Long>();
}
/**
* @return Returns the maxWindowSize.
*/
public int getMaxWindowSize() {
return maxWindowSize;
}
/**
* @return Returns the windowSizeMilliseconds.
*/
public long getWindowSizeMilliseconds() {
return windowSizeMilliseconds;
}
// -----------------------
@Override
/**
* Unlike other "calculate" methods, this is <b>not</b> Idempotent.
*
* @param context
*/
public void onCalculate(QueryContext context) {
long currTime = context.getCurrentTime();
while (timeStampsAndIds.isEmpty() == false) {
long ts = timeStampsAndIds.firstKey();
if ((currTime - ts) >= windowSizeMilliseconds) {
/**
* We are freeing the Rows that have expired, here itself.
* Therefore, the
* {@link WindowFunction#allowFreeingWhenRSIsNull()} is not
* over-ridden.
*/
Object ids = timeStampsAndIds.get(ts);
int howMany = 1;
if (ids instanceof List) {
List<Long> list = (List<Long>) ids;
howMany = list.size();
}
removeFirstXIds(howMany);
}
else {
break;
}
}
/*
* Only the Rows that were participated in the previous cycle are
* allowed to be slid out. Not the ones that come in the current cycle.
*/
watchAndFree = totalRowIdsInWindow;
free = 0;
// If Rows are available, then the ones in the Window can be slid out.
maxRowsThatCanBeConsumed = maxWindowSize;
}
@Override
protected void process(QueryContext context, PerpetualResultSet currRow) throws Exception {
/*
* Whatever is remaining in the Window now, can be freed if there are
* Rows that can slide them out.
*/
if (watchAndFree > 0
/*
* But, slide out only those Events that have been processed and stayed
* for at least one cycle.
*/
&& totalRowIdsInWindow == maxWindowSize) {
if (currRow != null || (currRow == null && allowFreeingWhenRSIsNull())) {
int maxReplacements = Math.max(1 /* when currRow != null */, unprocessedRowBuffer
.getRows().size());
int clean = Math.min(watchAndFree, maxReplacements);
removeFirstXIds(clean);
watchAndFree = watchAndFree - clean;
}
}
super.process(context, currRow);
}
// todo Use this same method of List/Id in Map for Aggregate Functions.
protected void addIdAndTimestamp(Long timestampKey, Long id) {
Object ids = timeStampsAndIds.get(timestampKey);
if (ids != null) {
if (ids instanceof List) {
List<Long> list = (List<Long>) ids;
list.add(id);
}
else {
List<Long> list = new LinkedList<Long>();
list.add((Long) ids);
list.add(id);
timeStampsAndIds.put(timestampKey, list);
}
}
else {
timeStampsAndIds.put(timestampKey, id);
}
idsAddedInCycle.add(id);
totalRowIdsInWindow++;
}
@Override
protected void removeFirstXIds(int howMany) {
for (int i = howMany; i > 0;) {
Long timestampKey = timeStampsAndIds.firstKey();
Object ids = timeStampsAndIds.get(timestampKey);
if (ids instanceof List) {
List<Long> list = (List<Long>) ids;
for (Iterator<Long> iter = list.iterator(); iter.hasNext();) {
Long id = iter.next();
/*
* This Event arrived late and cannot be removed
* immediately. It has to stay for at least 1 cycle.
*/
if (idsAddedInCycle.contains(id)) {
continue;
}
oustedRowIds.add(id);
iter.remove();
totalRowIdsInWindow--;
i--;
if (i == 0) {
break;
}
}
if (list.isEmpty()) {
timeStampsAndIds.remove(timestampKey);
}
}
else {
Long id = (Long) ids;
/*
* Remove only if not added in the current Cycle.
*/
if (idsAddedInCycle.contains(id) == false) {
oustedRowIds.add(id);
i--;
timeStampsAndIds.remove(timestampKey);
totalRowIdsInWindow--;
}
}
}
}
@Override
protected void afterRowProcess(Object[] processedRow) {
Long rowId = ((Number) processedRow[idColumnBufferPos]).longValue();
Timestamp ts = (Timestamp) processedRow[timeColumnBufferPos];
addIdAndTimestamp(ts.getTime(), rowId);
}
@Override
protected int getNumOfIds() {
return totalRowIdsInWindow;
}
/**
* {@inheritDoc}
*/
@Override
protected boolean onCycleEnd(QueryContext context) throws Exception {
boolean val = super.onCycleEnd(context);
if (timeStampsAndIds.isEmpty() == false) {
Long nextTS = timeStampsAndIds.firstKey();
// Expires after the configured time.
Long expiryAt = nextTS + windowSizeMilliseconds;
context.addEventExpirationTime(expiryAt);
calculateTSFunctionPairs.add(new CalculateTSFunctionPair(expiryAt, getHomeFunction()));
}
idsAddedInCycle.clear();
return val;
}
}