Partitioner.java example

Explorer
StreamCruncher-master
- demo_src
  - streamcruncher
    - test
- src
  - streamcruncher
/*
 * StreamCruncher:  Copyright (c) 2006-2008, Ashwin Jayaprakash. All Rights Reserved.
 * Contact:         ashwin {dot} jayaprakash {at} gmail {dot} com
 * Web:             http://www.StreamCruncher.com
 * 
 * This file is part of StreamCruncher.
 * 
 *     StreamCruncher is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU Lesser General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * 
 *     StreamCruncher is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU Lesser General Public License for more details.
 * 
 *     You should have received a copy of the GNU Lesser General Public License
 *     along with StreamCruncher. If not, see <http://www.gnu.org/licenses/>.
 */
package streamcruncher.innards.core.partition;

import java.lang.ref.SoftReference;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.PriorityQueue;
import java.util.Set;

import streamcruncher.api.artifact.RowSpec;
import streamcruncher.innards.core.FilterInfo;
import streamcruncher.innards.core.QueryContext;
import streamcruncher.innards.core.filter.TableFilter;
import streamcruncher.innards.core.partition.function.Function;
import streamcruncher.util.AppendOnlyPrimitiveLongList;

/*
 * Author: Ashwin Jayaprakash Date: Mar 29, 2006 Time: 11:32:49 PM
 */

public abstract class Partitioner<F extends FilterInfo> implements TableFilter<F> {
    protected F filterInfo;

    protected FirstPartitionLevel firstLevel;

    // -------------

    protected PartitionDescender partitionDescender;

    // -------------

    protected PartitionOutputStore storage;

    /**
     * PQ instead of a TreeSet, because TreeSet does not allow duplicate
     * entries. Add the {@link Function#getHomeFunction()}, but remove the
     * function directly.
     */
    protected PriorityQueue<CalculateTSFunctionPair> calculateTSFunctionPairs;

    /**
     * Add the {@link Function#getHomeFunction()}, but remove the function
     * directly.
     */
    protected Set<Function> dirtyFunctions;

    /**
     * Add the {@link Function#getHomeFunction()}, but remove the function
     * directly.
     */
    protected Set<Function> unprocessedDataFunctions;

    // -------------

    protected int freedSinceCleanup;

    protected int consumedSinceCleanup;

    protected SoftReference gcRequiredIndicator;

    // -------------

    /**
     * @param queryName
     * @param filterInfo
     */
    public void init(String queryName, F filterInfo) throws Exception {
        this.filterInfo = filterInfo;

        // -------------

        dirtyFunctions = new HashSet<Function>();
        unprocessedDataFunctions = new HashSet<Function>();
        calculateTSFunctionPairs = new PriorityQueue<CalculateTSFunctionPair>();
        gcRequiredIndicator = new SoftReference(new byte[32]);

        PartitionSpec spec = (PartitionSpec) filterInfo.getFilterSpec();
        buildLevels(spec);

        partitionDescender = new PartitionDescender(firstLevel, dirtyFunctions,
                unprocessedDataFunctions, calculateTSFunctionPairs);
    }

    protected void initStorage(PartitionOutputStore store) {
        storage = store;
    }

    // ----------------

    protected void buildLevels(PartitionSpec spec) {
        PartitionLevel prevLevel = null;

        String[] columnNames = spec.getPartitionColumnNames();
        for (int i = columnNames.length - 1; i >= 0; i--) {
            if (i == 0) {
                if (prevLevel == null) {
                    firstLevel = new FirstPartitionLevel(columnNames[i], spec.getFunctionBuilder());
                }
                else {
                    firstLevel = new FirstPartitionLevel(columnNames[i], prevLevel);
                }
            }
            else {
                if (prevLevel == null) {
                    prevLevel = new PartitionLevel(columnNames[i], spec.getFunctionBuilder());
                }
                else {
                    prevLevel = new PartitionLevel(columnNames[i], prevLevel);
                }
            }
        }

        if (firstLevel == null) {
            firstLevel = new FirstPartitionLevel(spec.getFunctionBuilder());
        }
    }

    // -------------

    public void filter(QueryContext context) throws Exception {
        AppendOnlyPrimitiveLongList allOustedIds = new AppendOnlyPrimitiveLongList(20);
        LinkedList<Row> newRows = new LinkedList<Row>();
        RowSpec rowSpec = null;

        // -------------

        while (true) {
            CalculateTSFunctionPair calculateTSFunctionPair = calculateTSFunctionPairs.peek();

            if (calculateTSFunctionPair == null) {
                break;
            }

            long ts = calculateTSFunctionPair.getTimestamp();
            if (context.getCurrentTime() >= ts) {
                // Remove it.
                calculateTSFunctionPairs.poll();

                Function function = calculateTSFunctionPair.getFunction();
                // Add the Function explicitly to complete its cycle.
                dirtyFunctions.add(function);
                function.cycleStart(context);
            }
            else {
                break;
            }
        }

        for (Iterator<Function> iter = unprocessedDataFunctions.iterator(); iter.hasNext();) {
            Function function = iter.next();
            function.cycleStart(context);
            iter.remove();

            /**
             * Add the Function explicitly, because the unproc-buffer gets
             * consumed only in {@link Function#cycleEnd(Context)}, if it did
             * not receive any fresh rows.
             */
            dirtyFunctions.add(function);
        }

        // -------------

        int rowsCopied = 0;
        int rowsInserted = 0;
        int rowsOusted = 0;

        rowsCopied = copyAndDescend(context);

        // -------------

        for (Iterator<Function> iter = dirtyFunctions.iterator(); iter.hasNext();) {
            Function function = iter.next();
            iter.remove();
            boolean canDiscard = function.cycleEnd(context);

            AppendOnlyPrimitiveLongList oustedIds = function.getOustedRowIds();

            for (int i = oustedIds.getSize() - 1; i >= 0; i--) {
                allOustedIds.add(oustedIds.remove());
            }

            RowBuffer rowBuffer = function.getProcessedRowBuffer();
            if (rowSpec == null) {
                rowSpec = function.getFinalTableRowSpec();
            }
            List<Row> rows = rowBuffer.getRows();
            newRows.addAll(rows);
            rows.clear();

            if (canDiscard) {
                // Release the Strong-Reference.
                firstLevel.removeFunction(function);
            }
        }

        // -------------

        boolean storeSuccess = false;

        storage.startBatch(context);
        try {
            deleteMarkedRows(context);

            rowsOusted = allOustedIds.getSize();
            if (rowsOusted > 0) {
                markRowsForDeletion(context, allOustedIds);
            }

            rowsInserted = newRows.size();
            if (rowsInserted > 0) {
                insertRows(context, newRows);
            }

            storeSuccess = true;
        }
        catch (Exception e) {
            throw e;
        }
        finally {
            storage.endBatch(context, storeSuccess);
        }

        // -------------

        freedSinceCleanup = freedSinceCleanup + rowsOusted;
        consumedSinceCleanup = consumedSinceCleanup + rowsInserted;

        // -------------

        postProcess(context, rowsCopied, rowsOusted, rowsInserted);

        boolean cleanup = (gcRequiredIndicator.get() == null);
        if (cleanup) {
            gcRequiredIndicator = new SoftReference(new byte[32]);
        }

        cleanup = cleanup || (freedSinceCleanup > (0.75 * consumedSinceCleanup));
        if (cleanup) {
            partitionDescender.attemptCleanup();

            freedSinceCleanup = 0;
            consumedSinceCleanup = 0;
        }
    }

    protected void postProcess(QueryContext context, int rowsCopied, int rowsOusted,
            int rowsInserted) {
    }

    /**
     * Mark Rows so that they can be deleted in the next Cycle.
     * 
     * @param context
     * @param allOustedIds
     */
    protected void markRowsForDeletion(QueryContext context,
            AppendOnlyPrimitiveLongList allOustedIds) throws Exception {
        long markValue = -1 * context.getRunCount();
        storage.markRowsAsDead(context, markValue, allOustedIds);
    }

    /**
     * Delete the Rows that were marked for deletion in the <b>previous</b>
     * cycle. Then,
     * {@linkplain #markRowsForDeletion(QueryContext, AppendOnlyPrimitiveLongList) mark}
     * the Rows from the <b>current</b> cycle.
     * 
     * @param context
     */
    protected void deleteMarkedRows(QueryContext context) throws Exception {
        storage.deleteDeadRows(context);
    }

    protected void insertRows(QueryContext context, List<Row> newRows) throws Exception {
        storage.insertNewRow(context, newRows);
    }

    /**
     * @param context
     * @return Rows copied.
     * @throws Exception
     */
    protected abstract int copyAndDescend(QueryContext context) throws Exception;

    // -------------

    public void discard() {
        firstLevel = null;
        filterInfo = null;
        partitionDescender = null;

        storage.discard();
        dirtyFunctions.clear();
        dirtyFunctions = null;
        unprocessedDataFunctions.clear();
        unprocessedDataFunctions = null;
    }
}