/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapreduce.task.reduce; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.mapred.IFile.Reader; import org.apache.hadoop.mapreduce.TaskAttemptID; /** * <code>IFile.InMemoryReader</code> to read map-outputs present in-memory. */ @InterfaceAudience.Private @InterfaceStability.Unstable public class InMemoryReader<K, V> extends Reader<K, V> { private final TaskAttemptID taskAttemptId; private final MergeManager<K,V> merger; DataInputBuffer memDataIn = new DataInputBuffer(); private int start; private int length; public InMemoryReader(MergeManager<K,V> merger, TaskAttemptID taskAttemptId, byte[] data, int start, int length) throws IOException { super(null, null, length - start, null, null); this.merger = merger; this.taskAttemptId = taskAttemptId; buffer = data; bufferSize = (int)fileLength; memDataIn.reset(buffer, start, length); this.start = start; this.length = length; } @Override public void reset(int offset) { memDataIn.reset(buffer, start + offset, length); bytesRead = offset; eof = false; } @Override public long getPosition() throws IOException { // InMemoryReader does not initialize streams like Reader, so in.getPos() // would not work. Instead, return the number of uncompressed bytes read, // which will be correct since in-memory data is not compressed. return bytesRead; } @Override public long getLength() { return fileLength; } private void dumpOnError() { File dumpFile = new File("../output/" + taskAttemptId + ".dump"); System.err.println("Dumping corrupt map-output of " + taskAttemptId + " to " + dumpFile.getAbsolutePath()); try { FileOutputStream fos = new FileOutputStream(dumpFile); fos.write(buffer, 0, bufferSize); fos.close(); } catch (IOException ioe) { System.err.println("Failed to dump map-output of " + taskAttemptId); } } public boolean nextRawKey(DataInputBuffer key) throws IOException { try { if (!positionToNextRecord(memDataIn)) { return false; } // Setup the key int pos = memDataIn.getPosition(); byte[] data = memDataIn.getData(); key.reset(data, pos, currentKeyLength); // Position for the next value long skipped = memDataIn.skip(currentKeyLength); if (skipped != currentKeyLength) { throw new IOException("Rec# " + recNo + ": Failed to skip past key of length: " + currentKeyLength); } // Record the byte bytesRead += currentKeyLength; return true; } catch (IOException ioe) { dumpOnError(); throw ioe; } } public void nextRawValue(DataInputBuffer value) throws IOException { try { int pos = memDataIn.getPosition(); byte[] data = memDataIn.getData(); value.reset(data, pos, currentValueLength); // Position for the next record long skipped = memDataIn.skip(currentValueLength); if (skipped != currentValueLength) { throw new IOException("Rec# " + recNo + ": Failed to skip past value of length: " + currentValueLength); } // Record the byte bytesRead += currentValueLength; ++recNo; } catch (IOException ioe) { dumpOnError(); throw ioe; } } public void close() { // Release dataIn = null; buffer = null; // Inform the MergeManager if (merger != null) { merger.unreserve(bufferSize); } } }