/*
* StreamCruncher: Copyright (c) 2006-2008, Ashwin Jayaprakash. All Rights Reserved.
* Contact: ashwin {dot} jayaprakash {at} gmail {dot} com
* Web: http://www.StreamCruncher.com
*
* This file is part of StreamCruncher.
*
* StreamCruncher is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* StreamCruncher is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with StreamCruncher. If not, see <http://www.gnu.org/licenses/>.
*/
package streamcruncher.innards.core.partition;
import java.lang.ref.SoftReference;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Set;
import streamcruncher.api.artifact.RowSpec;
import streamcruncher.innards.core.FilterInfo;
import streamcruncher.innards.core.QueryContext;
import streamcruncher.innards.core.filter.TableFilter;
import streamcruncher.innards.core.partition.function.Function;
import streamcruncher.util.AppendOnlyPrimitiveLongList;
/*
* Author: Ashwin Jayaprakash Date: Mar 29, 2006 Time: 11:32:49 PM
*/
public abstract class Partitioner<F extends FilterInfo> implements TableFilter<F> {
protected F filterInfo;
protected FirstPartitionLevel firstLevel;
// -------------
protected PartitionDescender partitionDescender;
// -------------
protected PartitionOutputStore storage;
/**
* PQ instead of a TreeSet, because TreeSet does not allow duplicate
* entries. Add the {@link Function#getHomeFunction()}, but remove the
* function directly.
*/
protected PriorityQueue<CalculateTSFunctionPair> calculateTSFunctionPairs;
/**
* Add the {@link Function#getHomeFunction()}, but remove the function
* directly.
*/
protected Set<Function> dirtyFunctions;
/**
* Add the {@link Function#getHomeFunction()}, but remove the function
* directly.
*/
protected Set<Function> unprocessedDataFunctions;
// -------------
protected int freedSinceCleanup;
protected int consumedSinceCleanup;
protected SoftReference gcRequiredIndicator;
// -------------
/**
* @param queryName
* @param filterInfo
*/
public void init(String queryName, F filterInfo) throws Exception {
this.filterInfo = filterInfo;
// -------------
dirtyFunctions = new HashSet<Function>();
unprocessedDataFunctions = new HashSet<Function>();
calculateTSFunctionPairs = new PriorityQueue<CalculateTSFunctionPair>();
gcRequiredIndicator = new SoftReference(new byte[32]);
PartitionSpec spec = (PartitionSpec) filterInfo.getFilterSpec();
buildLevels(spec);
partitionDescender = new PartitionDescender(firstLevel, dirtyFunctions,
unprocessedDataFunctions, calculateTSFunctionPairs);
}
protected void initStorage(PartitionOutputStore store) {
storage = store;
}
// ----------------
protected void buildLevels(PartitionSpec spec) {
PartitionLevel prevLevel = null;
String[] columnNames = spec.getPartitionColumnNames();
for (int i = columnNames.length - 1; i >= 0; i--) {
if (i == 0) {
if (prevLevel == null) {
firstLevel = new FirstPartitionLevel(columnNames[i], spec.getFunctionBuilder());
}
else {
firstLevel = new FirstPartitionLevel(columnNames[i], prevLevel);
}
}
else {
if (prevLevel == null) {
prevLevel = new PartitionLevel(columnNames[i], spec.getFunctionBuilder());
}
else {
prevLevel = new PartitionLevel(columnNames[i], prevLevel);
}
}
}
if (firstLevel == null) {
firstLevel = new FirstPartitionLevel(spec.getFunctionBuilder());
}
}
// -------------
public void filter(QueryContext context) throws Exception {
AppendOnlyPrimitiveLongList allOustedIds = new AppendOnlyPrimitiveLongList(20);
LinkedList<Row> newRows = new LinkedList<Row>();
RowSpec rowSpec = null;
// -------------
while (true) {
CalculateTSFunctionPair calculateTSFunctionPair = calculateTSFunctionPairs.peek();
if (calculateTSFunctionPair == null) {
break;
}
long ts = calculateTSFunctionPair.getTimestamp();
if (context.getCurrentTime() >= ts) {
// Remove it.
calculateTSFunctionPairs.poll();
Function function = calculateTSFunctionPair.getFunction();
// Add the Function explicitly to complete its cycle.
dirtyFunctions.add(function);
function.cycleStart(context);
}
else {
break;
}
}
for (Iterator<Function> iter = unprocessedDataFunctions.iterator(); iter.hasNext();) {
Function function = iter.next();
function.cycleStart(context);
iter.remove();
/**
* Add the Function explicitly, because the unproc-buffer gets
* consumed only in {@link Function#cycleEnd(Context)}, if it did
* not receive any fresh rows.
*/
dirtyFunctions.add(function);
}
// -------------
int rowsCopied = 0;
int rowsInserted = 0;
int rowsOusted = 0;
rowsCopied = copyAndDescend(context);
// -------------
for (Iterator<Function> iter = dirtyFunctions.iterator(); iter.hasNext();) {
Function function = iter.next();
iter.remove();
boolean canDiscard = function.cycleEnd(context);
AppendOnlyPrimitiveLongList oustedIds = function.getOustedRowIds();
for (int i = oustedIds.getSize() - 1; i >= 0; i--) {
allOustedIds.add(oustedIds.remove());
}
RowBuffer rowBuffer = function.getProcessedRowBuffer();
if (rowSpec == null) {
rowSpec = function.getFinalTableRowSpec();
}
List<Row> rows = rowBuffer.getRows();
newRows.addAll(rows);
rows.clear();
if (canDiscard) {
// Release the Strong-Reference.
firstLevel.removeFunction(function);
}
}
// -------------
boolean storeSuccess = false;
storage.startBatch(context);
try {
deleteMarkedRows(context);
rowsOusted = allOustedIds.getSize();
if (rowsOusted > 0) {
markRowsForDeletion(context, allOustedIds);
}
rowsInserted = newRows.size();
if (rowsInserted > 0) {
insertRows(context, newRows);
}
storeSuccess = true;
}
catch (Exception e) {
throw e;
}
finally {
storage.endBatch(context, storeSuccess);
}
// -------------
freedSinceCleanup = freedSinceCleanup + rowsOusted;
consumedSinceCleanup = consumedSinceCleanup + rowsInserted;
// -------------
postProcess(context, rowsCopied, rowsOusted, rowsInserted);
boolean cleanup = (gcRequiredIndicator.get() == null);
if (cleanup) {
gcRequiredIndicator = new SoftReference(new byte[32]);
}
cleanup = cleanup || (freedSinceCleanup > (0.75 * consumedSinceCleanup));
if (cleanup) {
partitionDescender.attemptCleanup();
freedSinceCleanup = 0;
consumedSinceCleanup = 0;
}
}
protected void postProcess(QueryContext context, int rowsCopied, int rowsOusted,
int rowsInserted) {
}
/**
* Mark Rows so that they can be deleted in the next Cycle.
*
* @param context
* @param allOustedIds
*/
protected void markRowsForDeletion(QueryContext context,
AppendOnlyPrimitiveLongList allOustedIds) throws Exception {
long markValue = -1 * context.getRunCount();
storage.markRowsAsDead(context, markValue, allOustedIds);
}
/**
* Delete the Rows that were marked for deletion in the <b>previous</b>
* cycle. Then,
* {@linkplain #markRowsForDeletion(QueryContext, AppendOnlyPrimitiveLongList) mark}
* the Rows from the <b>current</b> cycle.
*
* @param context
*/
protected void deleteMarkedRows(QueryContext context) throws Exception {
storage.deleteDeadRows(context);
}
protected void insertRows(QueryContext context, List<Row> newRows) throws Exception {
storage.insertNewRow(context, newRows);
}
/**
* @param context
* @return Rows copied.
* @throws Exception
*/
protected abstract int copyAndDescend(QueryContext context) throws Exception;
// -------------
public void discard() {
firstLevel = null;
filterInfo = null;
partitionDescender = null;
storage.discard();
dirtyFunctions.clear();
dirtyFunctions = null;
unprocessedDataFunctions.clear();
unprocessedDataFunctions = null;
}
}