/*
* Copyright 2010 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.appengine.tools.mapreduce.inputs;
import com.google.common.base.Preconditions;
import com.google.common.io.ByteStreams;
import com.google.common.io.CountingInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Iterator;
import java.util.logging.Level;
import java.util.logging.Logger;
/**
* An iterator iterating over records in an input stream.
*
*/
class InputStreamIterator implements Iterator<InputStreamIterator.OffsetRecordPair> {
// --------------------------- STATIC FIELDS ---------------------------
private static final Logger log = Logger.getLogger(InputStreamIterator.class.getName());
private static final int READ_LIMIT = 1024 * 1024;
// ------------------------------ FIELDS ------------------------------
private final CountingInputStream input;
private final long length;
private final boolean skipFirstTerminator;
private final byte terminator;
private OffsetRecordPair currentValue;
// --------------------------- CONSTRUCTORS ---------------------------
// Note: length may be a negative value when we are reading beyond the split boundary.
InputStreamIterator(CountingInputStream input, long length, boolean skipFirstTerminator, byte terminator) {
this.input = Preconditions.checkNotNull(input);
this.length = length;
this.skipFirstTerminator = skipFirstTerminator;
this.terminator = terminator;
}
// ------------------------ INTERFACE METHODS ------------------------
// --------------------- Interface Iterator ---------------------
@Override
public boolean hasNext() {
try {
if (input.getCount() == 0 && skipFirstTerminator) {
// find the first record start;
if (skipUntilNextRecord(input) != SkipRecordResult.TERMINATOR) {
return false;
}
}
// we are reading one record after split-end
// and are skipping first record for all splits except for the leading one.
// check if we read one byte ahead of the split.
if (input.getCount() - 1 >= length) {
return false;
}
long recordStart = input.getCount();
input.mark(READ_LIMIT);
SkipRecordResult skipValue = skipUntilNextRecord(input);
if (skipValue == SkipRecordResult.AT_EOF) {
return false;
}
long recordEnd = input.getCount();
input.reset();
int byteValueLen = (int) (recordEnd - recordStart);
if (skipValue == SkipRecordResult.TERMINATOR) {
// Skip terminator
byteValueLen--;
}
byte[] byteValue = new byte[byteValueLen];
ByteStreams.readFully(input, byteValue);
if (skipValue == SkipRecordResult.TERMINATOR) {
Preconditions.checkState(input.skip(1) == 1); // skip the terminator
}
currentValue = new OffsetRecordPair(recordStart, byteValue);
return true;
} catch (IOException e) {
log.log(Level.WARNING, "Failed to read next record", e);
return false;
}
}
@Override
public OffsetRecordPair next() {
return currentValue;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
// -------------------------- INSTANCE METHODS --------------------------
// Searches for the start of the next record.
//
// Returns
// TERMINATOR if a terminator is reached. Otherwise,
// EOF_AFTER_RECORD if EOF is reached after reading some number of non-terminator characters
// AT_EOF if EOF is reached without any characters being read
private SkipRecordResult skipUntilNextRecord(InputStream stream) throws IOException {
boolean readCharSinceTerminator = false;
int value;
do {
value = stream.read();
if (value == -1) {
return readCharSinceTerminator ? SkipRecordResult.EOF_AFTER_RECORD
: SkipRecordResult.AT_EOF;
}
readCharSinceTerminator = true;
} while (value != (terminator & 0xff));
return SkipRecordResult.TERMINATOR;
}
// -------------------------- ENUMERATIONS --------------------------
private enum SkipRecordResult {
AT_EOF, EOF_AFTER_RECORD, TERMINATOR
}
// -------------------------- INNER CLASSES --------------------------
public static class OffsetRecordPair {
private final long offset;
private final byte[] record;
public OffsetRecordPair(long offset, byte[] record) {
this.offset = offset;
this.record = record;
}
public long getOffset() {
return offset;
}
public byte[] getRecord() {
return record;
}
public boolean equals(Object rhs) {
if (!(rhs instanceof OffsetRecordPair)) {
return false;
}
OffsetRecordPair rhsPair = (OffsetRecordPair) rhs;
return offset == rhsPair.getOffset()
&& Arrays.equals(record, rhsPair.getRecord());
}
public int hashCode() {
return new Long(offset).hashCode() ^ Arrays.hashCode(record);
}
}
}