/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.streaming.runtime.io;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.flink.annotation.Internal;
import org.apache.flink.core.memory.MemorySegment;
import org.apache.flink.core.memory.MemorySegmentFactory;
import org.apache.flink.runtime.event.AbstractEvent;
import org.apache.flink.runtime.io.disk.iomanager.IOManager;
import org.apache.flink.runtime.io.network.api.serialization.EventSerializer;
import org.apache.flink.runtime.io.network.buffer.Buffer;
import org.apache.flink.runtime.io.network.buffer.FreeingBufferRecycler;
import org.apache.flink.runtime.io.network.partition.consumer.BufferOrEvent;
import org.apache.flink.util.StringUtils;
/**
* The buffer spiller takes the buffers and events from a data stream and adds them to a spill file.
* After a number of elements have been spilled, the spiller can "roll over": It presents the spilled
* elements as a readable sequence, and opens a new spill file.
*
* <p>This implementation buffers data effectively in the OS cache, which gracefully extends to the
* disk. Most data is written and re-read milliseconds later. The file is deleted after the read.
* Consequently, in most cases, the data will never actually hit the physical disks.</p>
*
* <p>IMPORTANT: The SpilledBufferOrEventSequences created by this spiller all reuse the same
* reading memory (to reduce overhead) and can consequently not be read concurrently.</p>
*/
@Internal
public class BufferSpiller {
/** Size of header in bytes (see add method). */
static final int HEADER_SIZE = 9;
/** The counter that selects the next directory to spill into. */
private static final AtomicInteger DIRECTORY_INDEX = new AtomicInteger(0);
/** The size of the buffer with which data is read back in. */
private static final int READ_BUFFER_SIZE = 1024 * 1024;
/** The directories to spill to. */
private final File tempDir;
/** The name prefix for spill files. */
private final String spillFilePrefix;
/** The buffer used for bulk reading data (used in the SpilledBufferOrEventSequence). */
private final ByteBuffer readBuffer;
/** The buffer that encodes the spilled header. */
private final ByteBuffer headBuffer;
/** The reusable array that holds header and contents buffers. */
private final ByteBuffer[] sources;
/** The file that we currently spill to. */
private File currentSpillFile;
/** The channel of the file we currently spill to. */
private FileChannel currentChannel;
/** The page size, to let this reader instantiate properly sized memory segments. */
private final int pageSize;
/** A counter, to created numbered spill files. */
private int fileCounter;
/** The number of bytes written since the last roll over. */
private long bytesWritten;
/**
* Creates a new buffer spiller, spilling to one of the I/O manager's temp directories.
*
* @param ioManager The I/O manager for access to teh temp directories.
* @param pageSize The page size used to re-create spilled buffers.
* @throws IOException Thrown if the temp files for spilling cannot be initialized.
*/
public BufferSpiller(IOManager ioManager, int pageSize) throws IOException {
this.pageSize = pageSize;
this.readBuffer = ByteBuffer.allocateDirect(READ_BUFFER_SIZE);
this.readBuffer.order(ByteOrder.LITTLE_ENDIAN);
this.headBuffer = ByteBuffer.allocateDirect(16);
this.headBuffer.order(ByteOrder.LITTLE_ENDIAN);
this.sources = new ByteBuffer[] { this.headBuffer, null };
File[] tempDirs = ioManager.getSpillingDirectories();
this.tempDir = tempDirs[DIRECTORY_INDEX.getAndIncrement() % tempDirs.length];
byte[] rndBytes = new byte[32];
new Random().nextBytes(rndBytes);
this.spillFilePrefix = StringUtils.byteToHexString(rndBytes) + '.';
// prepare for first contents
createSpillingChannel();
}
/**
* Adds a buffer or event to the sequence of spilled buffers and events.
*
* @param boe The buffer or event to add and spill.
* @throws IOException Thrown, if the buffer of event could not be spilled.
*/
public void add(BufferOrEvent boe) throws IOException {
try {
ByteBuffer contents;
if (boe.isBuffer()) {
Buffer buf = boe.getBuffer();
contents = buf.getMemorySegment().wrap(0, buf.getSize());
}
else {
contents = EventSerializer.toSerializedEvent(boe.getEvent());
}
headBuffer.clear();
headBuffer.putInt(boe.getChannelIndex());
headBuffer.putInt(contents.remaining());
headBuffer.put((byte) (boe.isBuffer() ? 0 : 1));
headBuffer.flip();
bytesWritten += (headBuffer.remaining() + contents.remaining());
sources[1] = contents;
currentChannel.write(sources);
}
finally {
if (boe.isBuffer()) {
boe.getBuffer().recycle();
}
}
}
/**
* Starts a new sequence of spilled buffers and event and returns the current sequence of spilled buffers
* for reading. This method returns {@code null}, if nothing was added since the creation of the spiller, or the
* last call to this method.
*
* <p>NOTE: The SpilledBufferOrEventSequences created by this method all reuse the same
* reading memory (to reduce overhead) and can consequently not be read concurrently with each other.
* To create a sequence that can be read concurrently with the previous SpilledBufferOrEventSequence, use the
* {@link #rollOverWithNewBuffer()} method.</p>
*
* @return The readable sequence of spilled buffers and events, or 'null', if nothing was added.
* @throws IOException Thrown, if the readable sequence could not be created, or no new spill
* file could be created.
*/
public SpilledBufferOrEventSequence rollOver() throws IOException {
return rollOverInternal(false);
}
/**
* Starts a new sequence of spilled buffers and event and returns the current sequence of spilled buffers
* for reading. This method returns {@code null}, if nothing was added since the creation of the spiller, or the
* last call to this method.
*
* <p>The SpilledBufferOrEventSequence returned by this method is safe for concurrent consumption with
* any previously returned sequence.</p>
*
* @return The readable sequence of spilled buffers and events, or 'null', if nothing was added.
* @throws IOException Thrown, if the readable sequence could not be created, or no new spill
* file could be created.
*/
public SpilledBufferOrEventSequence rollOverWithNewBuffer() throws IOException {
return rollOverInternal(true);
}
private SpilledBufferOrEventSequence rollOverInternal(boolean newBuffer) throws IOException {
if (bytesWritten == 0) {
return null;
}
ByteBuffer buf;
if (newBuffer) {
buf = ByteBuffer.allocateDirect(READ_BUFFER_SIZE);
buf.order(ByteOrder.LITTLE_ENDIAN);
} else {
buf = readBuffer;
}
// create a reader for the spilled data
currentChannel.position(0L);
SpilledBufferOrEventSequence seq =
new SpilledBufferOrEventSequence(currentSpillFile, currentChannel, buf, pageSize);
// create ourselves a new spill file
createSpillingChannel();
bytesWritten = 0L;
return seq;
}
/**
* Cleans up the current spilling channel and file.
*
* <p>Does not clean up the SpilledBufferOrEventSequences generated by calls to
* {@link #rollOver()}.
*
* @throws IOException Thrown if channel closing or file deletion fail.
*/
public void close() throws IOException {
currentChannel.close();
if (!currentSpillFile.delete()) {
throw new IOException("Cannot delete spill file");
}
}
/**
* Gets the number of bytes written in the current spill file.
* @return the number of bytes written in the current spill file
*/
public long getBytesWritten() {
return bytesWritten;
}
// ------------------------------------------------------------------------
// For testing
// ------------------------------------------------------------------------
File getCurrentSpillFile() {
return currentSpillFile;
}
FileChannel getCurrentChannel() {
return currentChannel;
}
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
@SuppressWarnings("resource")
private void createSpillingChannel() throws IOException {
currentSpillFile = new File(tempDir, spillFilePrefix + (fileCounter++) + ".buffer");
currentChannel = new RandomAccessFile(currentSpillFile, "rw").getChannel();
}
// ------------------------------------------------------------------------
/**
* This class represents a sequence of spilled buffers and events, created by the
* {@link BufferSpiller}. The sequence of buffers and events can be read back using the
* method {@link #getNext()}.
*/
public static class SpilledBufferOrEventSequence {
/** Header is "channel index" (4 bytes) + length (4 bytes) + buffer/event (1 byte). */
private static final int HEADER_LENGTH = 9;
/** The file containing the data. */
private final File file;
/** The file channel to draw the data from. */
private final FileChannel fileChannel;
/** The byte buffer for bulk reading. */
private final ByteBuffer buffer;
/** We store this size as a constant because it is crucial it never changes. */
private final long size;
/** The page size to instantiate properly sized memory segments. */
private final int pageSize;
/** Flag to track whether the sequence has been opened already. */
private boolean opened = false;
/**
* Create a reader that reads a sequence of spilled buffers and events.
*
* @param file The file with the data.
* @param fileChannel The file channel to read the data from.
* @param buffer The buffer used for bulk reading.
* @param pageSize The page size to use for the created memory segments.
*/
SpilledBufferOrEventSequence(File file, FileChannel fileChannel, ByteBuffer buffer, int pageSize)
throws IOException {
this.file = file;
this.fileChannel = fileChannel;
this.buffer = buffer;
this.pageSize = pageSize;
this.size = fileChannel.size();
}
/**
* Initializes the sequence for reading.
* This method needs to be called before the first call to {@link #getNext()}. Otherwise
* the results of {@link #getNext()} are not predictable.
*/
public void open() {
if (!opened) {
opened = true;
buffer.position(0);
buffer.limit(0);
}
}
/**
* Gets the next BufferOrEvent from the spilled sequence, or {@code null}, if the
* sequence is exhausted.
*
* @return The next BufferOrEvent from the spilled sequence, or {@code null} (end of sequence).
* @throws IOException Thrown, if the reads failed, of if the byte stream is corrupt.
*/
public BufferOrEvent getNext() throws IOException {
if (buffer.remaining() < HEADER_LENGTH) {
buffer.compact();
while (buffer.position() < HEADER_LENGTH) {
if (fileChannel.read(buffer) == -1) {
if (buffer.position() == 0) {
// no trailing data
return null;
} else {
throw new IOException("Found trailing incomplete buffer or event");
}
}
}
buffer.flip();
}
final int channel = buffer.getInt();
final int length = buffer.getInt();
final boolean isBuffer = buffer.get() == 0;
if (isBuffer) {
// deserialize buffer
if (length > pageSize) {
throw new IOException(String.format(
"Spilled buffer (%d bytes) is larger than page size of (%d bytes)", length, pageSize));
}
MemorySegment seg = MemorySegmentFactory.allocateUnpooledSegment(pageSize);
int segPos = 0;
int bytesRemaining = length;
while (true) {
int toCopy = Math.min(buffer.remaining(), bytesRemaining);
if (toCopy > 0) {
seg.put(segPos, buffer, toCopy);
segPos += toCopy;
bytesRemaining -= toCopy;
}
if (bytesRemaining == 0) {
break;
}
else {
buffer.clear();
if (fileChannel.read(buffer) == -1) {
throw new IOException("Found trailing incomplete buffer");
}
buffer.flip();
}
}
Buffer buf = new Buffer(seg, FreeingBufferRecycler.INSTANCE);
buf.setSize(length);
return new BufferOrEvent(buf, channel);
}
else {
// deserialize event
if (length > buffer.capacity() - HEADER_LENGTH) {
throw new IOException("Event is too large");
}
if (buffer.remaining() < length) {
buffer.compact();
while (buffer.position() < length) {
if (fileChannel.read(buffer) == -1) {
throw new IOException("Found trailing incomplete event");
}
}
buffer.flip();
}
int oldLimit = buffer.limit();
buffer.limit(buffer.position() + length);
AbstractEvent evt = EventSerializer.fromSerializedEvent(buffer, getClass().getClassLoader());
buffer.limit(oldLimit);
return new BufferOrEvent(evt, channel);
}
}
/**
* Cleans up all file resources held by this spilled sequence.
*
* @throws IOException Thrown, if file channel closing or file deletion fail.
*/
public void cleanup() throws IOException {
fileChannel.close();
if (!file.delete()) {
throw new IOException("Cannot remove temp file for stream alignment writer");
}
}
/**
* Gets the size of this spilled sequence.
*/
public long size() throws IOException {
return size;
}
}
}