/***********************************************************************************************************************
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
**********************************************************************************************************************/
package eu.stratosphere.api.java.record.io;
import java.io.IOException;
import eu.stratosphere.api.common.io.statistics.BaseStatistics;
import eu.stratosphere.configuration.Configuration;
import eu.stratosphere.core.fs.FileInputSplit;
import eu.stratosphere.types.Record;
/**
*
*/
public abstract class FixedLengthInputFormat extends FileInputFormat {
private static final long serialVersionUID = 1L;
/**
* The config parameter which defines the fixed length of a record.
*/
public static final String RECORDLENGTH_PARAMETER_KEY = "pact.fix-input.record-length";
/**
* The default read buffer size = 1MB.
*/
private static final int DEFAULT_READ_BUFFER_SIZE = 1024 * 1024;
/**
* Buffer to read a batch of records from a file
*/
private byte[] readBuffer;
/**
* The position in the stream
*/
private long streamPos;
/**
* The end position in the stream.
*/
private long streamEnd;
/**
* read position within the read buffer
*/
private int readBufferPos;
/**
* The limit of the data in the read buffer.
*/
private int readBufferLimit;
/**
* fixed length of all records
*/
private int recordLength;
/**
* size of the read buffer
*/
private int readBufferSize = DEFAULT_READ_BUFFER_SIZE;
/**
* The flag whether the stream is exhausted.
*/
private boolean exhausted;
// --------------------------------------------------------------------------------------------
/**
* Constructor only sets the key and value classes
*/
protected FixedLengthInputFormat() {}
/**
* Reads a record out of the given buffer. This operation always consumes the standard number of
* bytes, regardless of whether the produced record was valid.
*
* @param target The target Record
* @param buffer The buffer containing the binary data.
* @param startPos The start position in the byte array.
* @return True, is the record is valid, false otherwise.
*/
public abstract boolean readBytes(Record target, byte[] buffer, int startPos);
/**
* Returns the fixed length of a record.
*
* @return the fixed length of a record.
*/
public int getRecordLength() {
return this.recordLength;
}
/**
* Gets the size of the buffer internally used to parse record boundaries.
*
* @return The size of the parsing buffer.
*/
public int getReadBufferSize() {
return this.readBuffer.length;
}
// --------------------------------------------------------------------------------------------
@Override
public void configure(Configuration parameters) {
// pass parameters to FileInputFormat
super.configure(parameters);
// read own parameters
this.recordLength = parameters.getInteger(RECORDLENGTH_PARAMETER_KEY, 0);
if (recordLength < 1) {
throw new IllegalArgumentException("The record length parameter must be set and larger than 0.");
}
}
@Override
public void open(FileInputSplit split) throws IOException {
// open input split using FileInputFormat
super.open(split);
// adjust the stream positions for boundary splits
int recordOffset = (int) (this.splitStart % this.recordLength);
if(recordOffset != 0) {
// move start to next boundary
super.stream.seek(this.splitStart + recordOffset);
}
this.streamPos = this.splitStart + recordOffset;
this.streamEnd = this.splitStart + this.splitLength;
this.streamEnd += this.streamEnd % this.recordLength;
// adjust readBufferSize
this.readBufferSize += this.recordLength - (this.readBufferSize % this.recordLength);
if (this.readBuffer == null || this.readBuffer.length != this.readBufferSize) {
this.readBuffer = new byte[this.readBufferSize];
}
this.readBufferLimit = 0;
this.readBufferPos = 0;
this.exhausted = false;
fillReadBuffer();
}
/**
* {@inheritDoc}
* @throws IOException
*/
@Override
public FileBaseStatistics getStatistics(BaseStatistics cachedStats) throws IOException {
final FileBaseStatistics stats = super.getStatistics(cachedStats);
return stats == null ? null :
new FileBaseStatistics(stats.getLastModificationTime(), stats.getTotalInputSize(), this.recordLength);
}
// --------------------------------------------------------------------------------------------
@Override
public boolean reachedEnd() {
return this.exhausted;
}
@Override
public Record nextRecord(Record reuse) throws IOException {
// check if read buffer contains another full record
if (this.readBufferLimit - this.readBufferPos <= 0) {
// get another buffer
fillReadBuffer();
// check if source is exhausted
if (this.exhausted) {
return null;
}
}
else if (this.readBufferLimit - this.readBufferPos < this.recordLength) {
throw new IOException("Unable to read full record");
}
boolean val = readBytes(reuse, this.readBuffer, this.readBufferPos);
this.readBufferPos += this.recordLength;
if (this.readBufferPos >= this.readBufferLimit) {
fillReadBuffer();
}
return val ? reuse : null;
}
/**
* Fills the next read buffer from the file stream.
*
* @throws IOException
*/
private void fillReadBuffer() throws IOException {
// special case for compressed files.
if(splitLength == FileInputFormat.READ_WHOLE_SPLIT_FLAG) {
int read = this.stream.read(this.readBuffer, 0, this.readBufferSize);
if (read == -1) {
exhausted = true;
} else {
this.streamPos += read;
this.readBufferPos = 0;
this.readBufferLimit = read;
}
return;
}
int toRead = (int) Math.min(this.streamEnd - this.streamPos, this.readBufferSize);
if (toRead <= 0) {
this.exhausted = true;
return;
}
// fill read buffer
int read = this.stream.read(this.readBuffer, 0, toRead);
if (read <= 0) {
this.exhausted = true;
} else {
this.streamPos += read;
this.readBufferPos = 0;
this.readBufferLimit = read;
}
}
}