/*********************************************************************************************************************** * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the * specific language governing permissions and limitations under the License. **********************************************************************************************************************/ package eu.stratosphere.pact.runtime.sort; import java.io.EOFException; import java.io.IOException; import java.util.ArrayList; import java.util.List; import eu.stratosphere.api.common.typeutils.TypeComparator; import eu.stratosphere.api.common.typeutils.TypeSerializer; import eu.stratosphere.core.memory.MemorySegment; import eu.stratosphere.nephele.services.iomanager.ChannelWriterOutputView; import eu.stratosphere.nephele.services.memorymanager.ListMemorySegmentSource; import eu.stratosphere.pact.runtime.io.RandomAccessInputView; import eu.stratosphere.pact.runtime.io.SimpleCollectingOutputView; import eu.stratosphere.util.MutableObjectIterator; /** * */ public final class NormalizedKeySorter<T> implements InMemorySorter<T> { private static final int OFFSET_LEN = 8; private static final int DEFAULT_MAX_NORMALIZED_KEY_LEN = 8; private static final int MIN_REQUIRED_BUFFERS = 3; // ------------------------------------------------------------------------ // Members // ------------------------------------------------------------------------ private final byte[] swapBuffer; private final TypeSerializer<T> serializer; private final TypeComparator<T> comparator; private final SimpleCollectingOutputView recordCollector; private final RandomAccessInputView recordBuffer; private final RandomAccessInputView recordBufferForComparison; private MemorySegment currentSortIndexSegment; private final ArrayList<MemorySegment> freeMemory; private final ArrayList<MemorySegment> sortIndex; private final ArrayList<MemorySegment> recordBufferSegments; private long currentDataBufferOffset; private long sortIndexBytes; private int currentSortIndexOffset; private int numRecords; private final int numKeyBytes; private final int indexEntrySize; private final int indexEntriesPerSegment; private final int lastIndexEntryOffset; private final int segmentSize; private final int totalNumBuffers; private final boolean normalizedKeyFullyDetermines; private final boolean useNormKeyUninverted; // ------------------------------------------------------------------------- // Constructors / Destructors // ------------------------------------------------------------------------- public NormalizedKeySorter(TypeSerializer<T> serializer, TypeComparator<T> comparator, List<MemorySegment> memory) { this(serializer, comparator, memory, DEFAULT_MAX_NORMALIZED_KEY_LEN); } public NormalizedKeySorter(TypeSerializer<T> serializer, TypeComparator<T> comparator, List<MemorySegment> memory, int maxNormalizedKeyBytes) { if (serializer == null || comparator == null || memory == null) { throw new NullPointerException(); } if (maxNormalizedKeyBytes < 0) { throw new IllegalArgumentException("Maximal number of normalized key bytes must not be negative."); } this.serializer = serializer; this.comparator = comparator; this.useNormKeyUninverted = !comparator.invertNormalizedKey(); // check the size of the first buffer and record it. all further buffers must have the same size. // the size must also be a power of 2 this.totalNumBuffers = memory.size(); if (this.totalNumBuffers < MIN_REQUIRED_BUFFERS) { throw new IllegalArgumentException("Normalized-Key sorter requires at least " + MIN_REQUIRED_BUFFERS + " memory buffers."); } this.segmentSize = memory.get(0).size(); if (memory instanceof ArrayList<?>) { this.freeMemory = (ArrayList<MemorySegment>) memory; } else { this.freeMemory = new ArrayList<MemorySegment>(memory.size()); this.freeMemory.addAll(memory); } // create the buffer collections this.sortIndex = new ArrayList<MemorySegment>(16); this.recordBufferSegments = new ArrayList<MemorySegment>(16); // the views for the record collections this.recordCollector = new SimpleCollectingOutputView(this.recordBufferSegments, new ListMemorySegmentSource(this.freeMemory), this.segmentSize); this.recordBuffer = new RandomAccessInputView(this.recordBufferSegments, this.segmentSize); this.recordBufferForComparison = new RandomAccessInputView(this.recordBufferSegments, this.segmentSize); // set up normalized key characteristics if (this.comparator.supportsNormalizedKey()) { this.numKeyBytes = Math.min(this.comparator.getNormalizeKeyLen(), maxNormalizedKeyBytes); this.normalizedKeyFullyDetermines = !this.comparator.isNormalizedKeyPrefixOnly(this.numKeyBytes); } else { this.numKeyBytes = 0; this.normalizedKeyFullyDetermines = false; } // compute the index entry size and limits this.indexEntrySize = this.numKeyBytes + OFFSET_LEN; this.indexEntriesPerSegment = segmentSize / this.indexEntrySize; this.lastIndexEntryOffset = (this.indexEntriesPerSegment - 1) * this.indexEntrySize; this.swapBuffer = new byte[this.indexEntrySize]; // set to initial state this.currentSortIndexSegment = nextMemorySegment(); this.sortIndex.add(this.currentSortIndexSegment); } // ------------------------------------------------------------------------- // Memory Segment // ------------------------------------------------------------------------- /** * Resets the sort buffer back to the state where it is empty. All contained data is discarded. */ @Override public void reset() { // reset all offsets this.numRecords = 0; this.currentSortIndexOffset = 0; this.currentDataBufferOffset = 0; this.sortIndexBytes = 0; // return all memory this.freeMemory.addAll(this.sortIndex); this.freeMemory.addAll(this.recordBufferSegments); this.sortIndex.clear(); this.recordBufferSegments.clear(); // grab first buffers this.currentSortIndexSegment = nextMemorySegment(); this.sortIndex.add(this.currentSortIndexSegment); this.recordCollector.reset(); } /** * Checks whether the buffer is empty. * * @return True, if no record is contained, false otherwise. */ @Override public boolean isEmpty() { return this.numRecords == 0; } /** * Collects all memory segments from this sorter. * * @return All memory segments from this sorter. */ @Override public List<MemorySegment> dispose() { this.freeMemory.addAll(this.sortIndex); this.freeMemory.addAll(this.recordBufferSegments); this.recordBufferSegments.clear(); this.sortIndex.clear(); return this.freeMemory; } /** * Gets the total capacity of this sorter, in bytes. * * @return The sorter's total capacity. */ @Override public long getCapacity() { return ((long) this.totalNumBuffers) * this.segmentSize; } /** * Gets the number of bytes currently occupied in this sorter. * * @return The number of bytes occupied. */ @Override public long getOccupancy() { return this.currentDataBufferOffset + this.sortIndexBytes; } // ------------------------------------------------------------------------- // Retrieving and Writing // ------------------------------------------------------------------------- /** * Gets the record at the given logical position. * * @param reuse The target object to deserialize the record into. * @param logicalPosition The logical position of the record. * @throws IOException Thrown, if an exception occurred during deserialization. */ @Override public T getRecord(T reuse, int logicalPosition) throws IOException { return getRecordFromBuffer(reuse, readPointer(logicalPosition)); } /** * Writes a given record to this sort buffer. The written record will be appended and take * the last logical position. * * @param record The record to be written. * @return True, if the record was successfully written, false, if the sort buffer was full. * @throws IOException Thrown, if an error occurred while serializing the record into the buffers. */ @Override public boolean write(T record) throws IOException { //check whether we need a new memory segment for the sort index if (this.currentSortIndexOffset > this.lastIndexEntryOffset) { if (memoryAvailable()) { this.currentSortIndexSegment = nextMemorySegment(); this.sortIndex.add(this.currentSortIndexSegment); this.currentSortIndexOffset = 0; this.sortIndexBytes += this.segmentSize; } else { return false; } } // add the pointer and the normalized key this.currentSortIndexSegment.putLong(this.currentSortIndexOffset, this.currentDataBufferOffset); if(this.numKeyBytes != 0) { this.comparator.putNormalizedKey(record, this.currentSortIndexSegment, this.currentSortIndexOffset + OFFSET_LEN, this.numKeyBytes); } // serialize the record into the data buffers try { this.serializer.serialize(record, this.recordCollector); this.currentSortIndexOffset += this.indexEntrySize; this.currentDataBufferOffset = this.recordCollector.getCurrentOffset(); this.numRecords++; return true; } catch (EOFException eofex) { return false; } } // ------------------------------------------------------------------------ // Access Utilities // ------------------------------------------------------------------------ private final long readPointer(int logicalPosition) { if (logicalPosition < 0 | logicalPosition >= this.numRecords) { throw new IndexOutOfBoundsException(); } final int bufferNum = logicalPosition / this.indexEntriesPerSegment; final int segmentOffset = logicalPosition % this.indexEntriesPerSegment; return this.sortIndex.get(bufferNum).getLong(segmentOffset * this.indexEntrySize); } private final T getRecordFromBuffer(T reuse, long pointer) throws IOException { this.recordBuffer.setReadPosition(pointer); return this.serializer.deserialize(reuse, this.recordBuffer); } private final int compareRecords(long pointer1, long pointer2) { this.recordBuffer.setReadPosition(pointer1); this.recordBufferForComparison.setReadPosition(pointer2); try { return this.comparator.compare(this.recordBuffer, this.recordBufferForComparison); } catch (IOException ioex) { throw new RuntimeException("Error comparing two records.", ioex); } } private final boolean memoryAvailable() { return !this.freeMemory.isEmpty(); } private final MemorySegment nextMemorySegment() { return this.freeMemory.remove(this.freeMemory.size() - 1); } // ------------------------------------------------------------------------- // Indexed Sorting // ------------------------------------------------------------------------- /* (non-Javadoc) * @see eu.stratosphere.pact.runtime.sort.IndexedSortable#compare(int, int) */ @Override public int compare(int i, int j) { final int bufferNumI = i / this.indexEntriesPerSegment; final int segmentOffsetI = (i % this.indexEntriesPerSegment) * this.indexEntrySize; final int bufferNumJ = j / this.indexEntriesPerSegment; final int segmentOffsetJ = (j % this.indexEntriesPerSegment) * this.indexEntrySize; final MemorySegment segI = this.sortIndex.get(bufferNumI); final MemorySegment segJ = this.sortIndex.get(bufferNumJ); int val = MemorySegment.compare(segI, segJ, segmentOffsetI + OFFSET_LEN, segmentOffsetJ + OFFSET_LEN, this.numKeyBytes); if (val != 0 || this.normalizedKeyFullyDetermines) { return this.useNormKeyUninverted ? val : -val; } final long pointerI = segI.getLong(segmentOffsetI); final long pointerJ = segJ.getLong(segmentOffsetJ); return compareRecords(pointerI, pointerJ); } @Override public void swap(int i, int j) { final int bufferNumI = i / this.indexEntriesPerSegment; final int segmentOffsetI = (i % this.indexEntriesPerSegment) * this.indexEntrySize; final int bufferNumJ = j / this.indexEntriesPerSegment; final int segmentOffsetJ = (j % this.indexEntriesPerSegment) * this.indexEntrySize; final MemorySegment segI = this.sortIndex.get(bufferNumI); final MemorySegment segJ = this.sortIndex.get(bufferNumJ); MemorySegment.swapBytes(segI, segJ, this.swapBuffer, segmentOffsetI, segmentOffsetJ, this.indexEntrySize); } @Override public int size() { return this.numRecords; } // ------------------------------------------------------------------------- /** * Gets an iterator over all records in this buffer in their logical order. * * @return An iterator returning the records in their logical order. */ @Override public final MutableObjectIterator<T> getIterator() { return new MutableObjectIterator<T>() { private final int size = size(); private int current = 0; private int currentSegment = 0; private int currentOffset = 0; private MemorySegment currentIndexSegment = sortIndex.get(0); @Override public T next(T target) { if (this.current < this.size) { this.current++; if (this.currentOffset > lastIndexEntryOffset) { this.currentOffset = 0; this.currentIndexSegment = sortIndex.get(++this.currentSegment); } long pointer = this.currentIndexSegment.getLong(this.currentOffset); this.currentOffset += indexEntrySize; try { return getRecordFromBuffer(target, pointer); } catch (IOException ioe) { throw new RuntimeException(ioe); } } else { return null; } } }; } // ------------------------------------------------------------------------ // Writing to a DataOutputView // ------------------------------------------------------------------------ /** * Writes the records in this buffer in their logical order to the given output. * * @param output The output view to write the records to. * @throws IOException Thrown, if an I/O exception occurred writing to the output view. */ @Override public void writeToOutput(final ChannelWriterOutputView output) throws IOException { int recordsLeft = this.numRecords; int currentMemSeg = 0; while (recordsLeft > 0) { final MemorySegment currentIndexSegment = this.sortIndex.get(currentMemSeg++); int offset = 0; // check whether we have a full or partially full segment if (recordsLeft >= this.indexEntriesPerSegment) { // full segment for (;offset <= this.lastIndexEntryOffset; offset += this.indexEntrySize) { final long pointer = currentIndexSegment.getLong(offset); this.recordBuffer.setReadPosition(pointer); this.serializer.copy(this.recordBuffer, output); } recordsLeft -= this.indexEntriesPerSegment; } else { // partially filled segment for (; recordsLeft > 0; recordsLeft--, offset += this.indexEntrySize) { final long pointer = currentIndexSegment.getLong(offset); this.recordBuffer.setReadPosition(pointer); this.serializer.copy(this.recordBuffer, output); } } } } /** * Writes a subset of the records in this buffer in their logical order to the given output. * * @param output The output view to write the records to. * @param start The logical start position of the subset. * @param num The number of elements to write. * @throws IOException Thrown, if an I/O exception occurred writing to the output view. */ @Override public void writeToOutput(final ChannelWriterOutputView output, final int start, int num) throws IOException { int currentMemSeg = start / this.indexEntriesPerSegment; int offset = (start % this.indexEntriesPerSegment) * this.indexEntrySize; while (num > 0) { final MemorySegment currentIndexSegment = this.sortIndex.get(currentMemSeg++); // check whether we have a full or partially full segment if (num >= this.indexEntriesPerSegment && offset == 0) { // full segment for (;offset <= this.lastIndexEntryOffset; offset += this.indexEntrySize) { final long pointer = currentIndexSegment.getLong(offset); this.recordBuffer.setReadPosition(pointer); this.serializer.copy(this.recordBuffer, output); } num -= this.indexEntriesPerSegment; } else { // partially filled segment for (; num > 0 && offset <= this.lastIndexEntryOffset; num--, offset += this.indexEntrySize) { final long pointer = currentIndexSegment.getLong(offset); this.recordBuffer.setReadPosition(pointer); this.serializer.copy(this.recordBuffer, output); } } offset = 0; } } }