SortedWindowFunction.java example

Explorer
StreamCruncher-master
- demo_src
  - streamcruncher
    - test
- src
  - streamcruncher
/*
 * StreamCruncher:  Copyright (c) 2006-2008, Ashwin Jayaprakash. All Rights Reserved.
 * Contact:         ashwin {dot} jayaprakash {at} gmail {dot} com
 * Web:             http://www.StreamCruncher.com
 * 
 * This file is part of StreamCruncher.
 * 
 *     StreamCruncher is free software: you can redistribute it and/or modify
 *     it under the terms of the GNU Lesser General Public License as published by
 *     the Free Software Foundation, either version 3 of the License, or
 *     (at your option) any later version.
 * 
 *     StreamCruncher is distributed in the hope that it will be useful,
 *     but WITHOUT ANY WARRANTY; without even the implied warranty of
 *     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *     GNU Lesser General Public License for more details.
 * 
 *     You should have received a copy of the GNU Lesser General Public License
 *     along with StreamCruncher. If not, see <http://www.gnu.org/licenses/>.
 */
package streamcruncher.innards.core.partition.function;

import java.util.HashMap;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.TreeMap;

import streamcruncher.api.artifact.RowSpec;
import streamcruncher.innards.core.QueryContext;
import streamcruncher.innards.core.partition.Row;
import streamcruncher.util.AtomicX;
import streamcruncher.util.PerpetualResultSet;

/*
 * Author: Ashwin Jayaprakash Date: Feb 19, 2006 Time: 4:30:16 PM
 */

public class SortedWindowFunction extends WindowFunction {
    protected final int windowSize;

    protected final int columnPosition;

    protected final Type type;

    protected final TreeMap<Comparable, LinkedHashSet<Long>> sortedData;

    protected final HashMap<Long, GroupHashComparablePair> itemsInWindow;

    protected final HashMap</* Group Hash */Long, IdComparablePair> columnGroupHashesInWindow;

    protected final int[] sameGroupColumnsPositions;

    /**
     * @param pinned
     * @param selectedRowSpec
     * @param newRowSpec
     * @param rowIdGenerator
     * @param sourceLocationForTargetCols
     * @param windowSize
     * @param columnName
     * @param sameGroupColumns
     *            If empty array is sent, then there is no check on repeating
     *            groups.
     * @param type
     */
    public SortedWindowFunction(RowSpec selectedRowSpec, RowSpec newRowSpec,
            AtomicX rowIdGenerator, int[] sourceLocationForTargetCols, int windowSize,
            String columnName, String[] sameGroupColumns, Type type) {
        super(selectedRowSpec, newRowSpec, rowIdGenerator, sourceLocationForTargetCols, windowSize);

        this.windowSize = windowSize;

        this.columnPosition = findColumnPosition(selectedRowSpec, columnName);

        this.type = type;
        this.sortedData = new TreeMap<Comparable, LinkedHashSet<Long>>();
        this.itemsInWindow = new HashMap<Long, GroupHashComparablePair>();

        this.columnGroupHashesInWindow = new HashMap<Long, IdComparablePair>();
        this.sameGroupColumnsPositions = new int[sameGroupColumns.length];
        for (int i = 0; i < sameGroupColumns.length; i++) {
            sameGroupColumnsPositions[i] = findColumnPosition(selectedRowSpec, sameGroupColumns[i]);
        }
    }

    private int findColumnPosition(RowSpec selectedRowSpec, String columnName) {
        int pos = 0;
        for (String column : selectedRowSpec.getColumnNames()) {
            if (columnName.equalsIgnoreCase(column)) {
                break;
            }

            pos++;
        }

        return pos;
    }

    /**
     * @return Returns the windowSize.
     */
    public int getWindowSize() {
        return windowSize;
    }

    // -------------------

    @Override
    /**
     * @param context
     */
    public void onCalculate(QueryContext context) {
        free = 0;
        maxRowsThatCanBeConsumed = windowSize;
    }

    @Override
    /**
     * return <code>true</code>. Allow freeing even if there are no Rows to
     * be consumed.
     */
    protected boolean allowFreeingWhenRSIsNull() {
        return false;
    }

    @Override
    protected void process(QueryContext context, PerpetualResultSet currRow) throws Exception {
        if (currRow == null) {
            return;
        }

        Row toBuffer = unprocessedRowBuffer.addNewRow();
        String[] unprocColumnNames = realTableRowSpec.getColumnNames();
        Object[] columns = toBuffer.getColumns();

        for (int i = 0; i < unprocColumnNames.length; i++) {
            columns[i] = currRow.getColumnValue(unprocColumnNames[i]);
        }
    }

    /**
     * {@inheritDoc}
     */
    @Override
    protected boolean onCycleEnd(QueryContext context) throws Exception {
        boolean val = super.onCycleEnd(context);

        // -----------------

        HashMap</* Id */Long, /* Hash */Long> cachedGroupHashes = new HashMap<Long, Long>();

        IdentityHashMap<Comparable, Object[]> newValues = new IdentityHashMap<Comparable, Object[]>();

        List<Row> rows = unprocessedRowBuffer.getRows();
        for (Row row : rows) {
            Object[] columns = row.getColumns();
            Object obj = columns[columnPosition];

            if (obj != null && obj instanceof Comparable) {
                Comparable currentVal = (Comparable) obj;
                Long id = ((Number) columns[idColumnBufferPos]).longValue();

                // Group update columns provided.
                if (sameGroupColumnsPositions.length > 0) {
                    long groupHash = 0;
                    for (int i = 0; i < sameGroupColumnsPositions.length; i++) {
                        Object grpMember = columns[sameGroupColumnsPositions[i]];

                        int hash = (grpMember != null) ? grpMember.hashCode() : 0;
                        groupHash = (37 * groupHash) + hash;
                    }

                    cachedGroupHashes.put(id, groupHash);

                    // --------------

                    IdComparablePair oldPair = columnGroupHashesInWindow.get(groupHash);
                    /*
                     * Old value is now being updated. So, old one must be
                     * flushed.
                     */
                    if (oldPair != null) {
                        oustedRowIds.add(oldPair.getId());

                        LinkedHashSet<Long> innerLevel = sortedData.get(oldPair.getComparable());
                        innerLevel.remove(oldPair.getId());
                        if (innerLevel.size() == 0) {
                            sortedData.remove(oldPair.getComparable());
                        }

                        itemsInWindow.remove(oldPair.getId());

                        columnGroupHashesInWindow.remove(groupHash);
                    }
                }

                // --------------

                // Place the new values.
                LinkedHashSet<Long> innerLevel = sortedData.get(currentVal);
                if (innerLevel == null) {
                    innerLevel = new LinkedHashSet<Long>();
                    sortedData.put(currentVal, innerLevel);
                }
                innerLevel.add(id);

                itemsInWindow.put(id, null);

                newValues.put(currentVal, columns);
            }
        }

        rows.clear();

        // -----------------

        // Size maintenance.
        while (itemsInWindow.size() > windowSize) {
            Comparable c = null;

            if (type == Type.HIGHEST) {
                c = sortedData.firstKey();
            }
            else {
                c = sortedData.lastKey();
            }

            /*
             * Start clearing from the earliest entry of the smallest/highest
             * set.
             */
            LinkedHashSet<Long> innerLevel = sortedData.get(c);
            for (Iterator<Long> iter = innerLevel.iterator(); (itemsInWindow.size() > windowSize)
                    && iter.hasNext();) {
                Long expelledId = iter.next();

                iter.remove();

                GroupHashComparablePair hashAndCPair = itemsInWindow.remove(expelledId);
                if (hashAndCPair != null) {
                    // Id made it into the top/bottom x in some previous cycle.
                    oustedRowIds.add(expelledId);
                    columnGroupHashesInWindow.remove(hashAndCPair.getGroupHash());
                }
            }
            if (innerLevel.size() == 0) {
                sortedData.remove(c);
            }
        }

        // -----------------

        if (oustedRowIds.getSize() > 0) {
            // Clear old values.
            int size = getNumOfIds();
            discardFirstXIds(size);
        }

        for (Comparable c : newValues.keySet()) {
            Object[] originalColumns = newValues.get(c);
            Long id = ((Number) originalColumns[idColumnBufferPos]).longValue();

            // New value that made it into the top/bottom x.
            if (itemsInWindow.containsKey(id)) {
                Row newRow = processedRowBuffer.addNewRowWithAutoValues(context.getRunCount());
                Object[] columns = newRow.getColumns();

                for (int i = 0; i < columns.length; i++) {
                    int position = sourceLocationForTargetCols[i];
                    if (position >= 0) {
                        columns[i] = originalColumns[position];
                    }
                }

                addId(id);

                Long hash = cachedGroupHashes.get(id);
                if (hash != null) {
                    columnGroupHashesInWindow.put(hash, new IdComparablePair(c, id));
                }

                /*
                 * Put the Id against the key. So that it can be used to track
                 * removed values.
                 */
                itemsInWindow.put(id, new GroupHashComparablePair(c, hash));
            }
        }

        return val;
    }

    // -----------------

    protected static class IdComparablePair {
        protected final Comparable comparable;

        protected final Long id;

        public IdComparablePair(Comparable comparable, Long id) {
            this.comparable = comparable;
            this.id = id;
        }

        public Comparable getComparable() {
            return comparable;
        }

        public Long getId() {
            return id;
        }
    }

    protected static class GroupHashComparablePair {
        protected final Comparable comparable;

        protected final Long groupHash;

        public GroupHashComparablePair(Comparable comparable, Long groupHash) {
            this.comparable = comparable;
            this.groupHash = groupHash;
        }

        public Comparable getComparable() {
            return comparable;
        }

        public Long getGroupHash() {
            return groupHash;
        }
    }

    public static enum Type {
        HIGHEST, LOWEST;
    }
}