/* * StreamCruncher: Copyright (c) 2006-2008, Ashwin Jayaprakash. All Rights Reserved. * Contact: ashwin {dot} jayaprakash {at} gmail {dot} com * Web: http://www.StreamCruncher.com * * This file is part of StreamCruncher. * * StreamCruncher is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * StreamCruncher is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with StreamCruncher. If not, see <http://www.gnu.org/licenses/>. */ package streamcruncher.innards.core.partition.function; import java.util.HashMap; import java.util.IdentityHashMap; import java.util.Iterator; import java.util.LinkedHashSet; import java.util.List; import java.util.TreeMap; import streamcruncher.api.artifact.RowSpec; import streamcruncher.innards.core.QueryContext; import streamcruncher.innards.core.partition.Row; import streamcruncher.util.AtomicX; import streamcruncher.util.PerpetualResultSet; /* * Author: Ashwin Jayaprakash Date: Feb 19, 2006 Time: 4:30:16 PM */ public class SortedWindowFunction extends WindowFunction { protected final int windowSize; protected final int columnPosition; protected final Type type; protected final TreeMap<Comparable, LinkedHashSet<Long>> sortedData; protected final HashMap<Long, GroupHashComparablePair> itemsInWindow; protected final HashMap</* Group Hash */Long, IdComparablePair> columnGroupHashesInWindow; protected final int[] sameGroupColumnsPositions; /** * @param pinned * @param selectedRowSpec * @param newRowSpec * @param rowIdGenerator * @param sourceLocationForTargetCols * @param windowSize * @param columnName * @param sameGroupColumns * If empty array is sent, then there is no check on repeating * groups. * @param type */ public SortedWindowFunction(RowSpec selectedRowSpec, RowSpec newRowSpec, AtomicX rowIdGenerator, int[] sourceLocationForTargetCols, int windowSize, String columnName, String[] sameGroupColumns, Type type) { super(selectedRowSpec, newRowSpec, rowIdGenerator, sourceLocationForTargetCols, windowSize); this.windowSize = windowSize; this.columnPosition = findColumnPosition(selectedRowSpec, columnName); this.type = type; this.sortedData = new TreeMap<Comparable, LinkedHashSet<Long>>(); this.itemsInWindow = new HashMap<Long, GroupHashComparablePair>(); this.columnGroupHashesInWindow = new HashMap<Long, IdComparablePair>(); this.sameGroupColumnsPositions = new int[sameGroupColumns.length]; for (int i = 0; i < sameGroupColumns.length; i++) { sameGroupColumnsPositions[i] = findColumnPosition(selectedRowSpec, sameGroupColumns[i]); } } private int findColumnPosition(RowSpec selectedRowSpec, String columnName) { int pos = 0; for (String column : selectedRowSpec.getColumnNames()) { if (columnName.equalsIgnoreCase(column)) { break; } pos++; } return pos; } /** * @return Returns the windowSize. */ public int getWindowSize() { return windowSize; } // ------------------- @Override /** * @param context */ public void onCalculate(QueryContext context) { free = 0; maxRowsThatCanBeConsumed = windowSize; } @Override /** * return <code>true</code>. Allow freeing even if there are no Rows to * be consumed. */ protected boolean allowFreeingWhenRSIsNull() { return false; } @Override protected void process(QueryContext context, PerpetualResultSet currRow) throws Exception { if (currRow == null) { return; } Row toBuffer = unprocessedRowBuffer.addNewRow(); String[] unprocColumnNames = realTableRowSpec.getColumnNames(); Object[] columns = toBuffer.getColumns(); for (int i = 0; i < unprocColumnNames.length; i++) { columns[i] = currRow.getColumnValue(unprocColumnNames[i]); } } /** * {@inheritDoc} */ @Override protected boolean onCycleEnd(QueryContext context) throws Exception { boolean val = super.onCycleEnd(context); // ----------------- HashMap</* Id */Long, /* Hash */Long> cachedGroupHashes = new HashMap<Long, Long>(); IdentityHashMap<Comparable, Object[]> newValues = new IdentityHashMap<Comparable, Object[]>(); List<Row> rows = unprocessedRowBuffer.getRows(); for (Row row : rows) { Object[] columns = row.getColumns(); Object obj = columns[columnPosition]; if (obj != null && obj instanceof Comparable) { Comparable currentVal = (Comparable) obj; Long id = ((Number) columns[idColumnBufferPos]).longValue(); // Group update columns provided. if (sameGroupColumnsPositions.length > 0) { long groupHash = 0; for (int i = 0; i < sameGroupColumnsPositions.length; i++) { Object grpMember = columns[sameGroupColumnsPositions[i]]; int hash = (grpMember != null) ? grpMember.hashCode() : 0; groupHash = (37 * groupHash) + hash; } cachedGroupHashes.put(id, groupHash); // -------------- IdComparablePair oldPair = columnGroupHashesInWindow.get(groupHash); /* * Old value is now being updated. So, old one must be * flushed. */ if (oldPair != null) { oustedRowIds.add(oldPair.getId()); LinkedHashSet<Long> innerLevel = sortedData.get(oldPair.getComparable()); innerLevel.remove(oldPair.getId()); if (innerLevel.size() == 0) { sortedData.remove(oldPair.getComparable()); } itemsInWindow.remove(oldPair.getId()); columnGroupHashesInWindow.remove(groupHash); } } // -------------- // Place the new values. LinkedHashSet<Long> innerLevel = sortedData.get(currentVal); if (innerLevel == null) { innerLevel = new LinkedHashSet<Long>(); sortedData.put(currentVal, innerLevel); } innerLevel.add(id); itemsInWindow.put(id, null); newValues.put(currentVal, columns); } } rows.clear(); // ----------------- // Size maintenance. while (itemsInWindow.size() > windowSize) { Comparable c = null; if (type == Type.HIGHEST) { c = sortedData.firstKey(); } else { c = sortedData.lastKey(); } /* * Start clearing from the earliest entry of the smallest/highest * set. */ LinkedHashSet<Long> innerLevel = sortedData.get(c); for (Iterator<Long> iter = innerLevel.iterator(); (itemsInWindow.size() > windowSize) && iter.hasNext();) { Long expelledId = iter.next(); iter.remove(); GroupHashComparablePair hashAndCPair = itemsInWindow.remove(expelledId); if (hashAndCPair != null) { // Id made it into the top/bottom x in some previous cycle. oustedRowIds.add(expelledId); columnGroupHashesInWindow.remove(hashAndCPair.getGroupHash()); } } if (innerLevel.size() == 0) { sortedData.remove(c); } } // ----------------- if (oustedRowIds.getSize() > 0) { // Clear old values. int size = getNumOfIds(); discardFirstXIds(size); } for (Comparable c : newValues.keySet()) { Object[] originalColumns = newValues.get(c); Long id = ((Number) originalColumns[idColumnBufferPos]).longValue(); // New value that made it into the top/bottom x. if (itemsInWindow.containsKey(id)) { Row newRow = processedRowBuffer.addNewRowWithAutoValues(context.getRunCount()); Object[] columns = newRow.getColumns(); for (int i = 0; i < columns.length; i++) { int position = sourceLocationForTargetCols[i]; if (position >= 0) { columns[i] = originalColumns[position]; } } addId(id); Long hash = cachedGroupHashes.get(id); if (hash != null) { columnGroupHashesInWindow.put(hash, new IdComparablePair(c, id)); } /* * Put the Id against the key. So that it can be used to track * removed values. */ itemsInWindow.put(id, new GroupHashComparablePair(c, hash)); } } return val; } // ----------------- protected static class IdComparablePair { protected final Comparable comparable; protected final Long id; public IdComparablePair(Comparable comparable, Long id) { this.comparable = comparable; this.id = id; } public Comparable getComparable() { return comparable; } public Long getId() { return id; } } protected static class GroupHashComparablePair { protected final Comparable comparable; protected final Long groupHash; public GroupHashComparablePair(Comparable comparable, Long groupHash) { this.comparable = comparable; this.groupHash = groupHash; } public Comparable getComparable() { return comparable; } public Long getGroupHash() { return groupHash; } } public static enum Type { HIGHEST, LOWEST; } }