/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.task.reduce;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import java.io.IOException;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BoundedByteArrayOutputStream;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.CodecPool;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.Decompressor;
import org.apache.hadoop.mapred.IFileInputStream;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.MRConfig;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import java.io.RandomAccessFile;
import java.nio.channels.Channels;
@InterfaceAudience.Private
@InterfaceStability.Unstable
class InMemoryLinkMapOutput<K, V> extends InMemoryMapOutput<K, V> {
private static final Log LOG = LogFactory
.getLog(InMemoryLinkMapOutput.class);
private JobConf conf;
private final MergeManagerImpl<K, V> merger;
private final byte[] memory;
private BoundedByteArrayOutputStream byteStream;
// Decompression of map-outputs
private final CompressionCodec codec;
private final Decompressor decompressor;
private long compressedLength;
private long decompressedLength;
private long offset;
public InMemoryLinkMapOutput(JobConf conf, TaskAttemptID mapId,
MergeManagerImpl<K, V> merger, int size, CompressionCodec codec,
boolean primaryMapOutput) {
super(conf, mapId, merger, size, codec, primaryMapOutput);
// super(mapId, (long) size, primaryMapOutput);
this.conf = conf;
this.merger = merger;
this.codec = codec;
byteStream = new BoundedByteArrayOutputStream(size);
memory = byteStream.getBuffer();
if (codec != null) {
decompressor = CodecPool.getDecompressor(codec);
} else {
decompressor = null;
}
}
public byte[] getMemory() {
return memory;
}
public BoundedByteArrayOutputStream getArrayStream() {
return byteStream;
}
@Override
public void shuffle(MapHost host, InputStream input, long compressedLength,
long decompressedLength, ShuffleClientMetrics metrics,
Reporter reporter) throws IOException {
String mapHostName = host.getHostName().split(":")[0];
String app_path = conf.get(MRConfig.LOCAL_DIR);
LOG.debug("original app_path " + app_path);
String[] app_path_parts = app_path.split("/");
app_path_parts[app_path_parts.length-5] = mapHostName;
StringBuilder builder = new StringBuilder();
for(String s : app_path_parts) {
builder.append(s);
builder.append("/");
}
app_path = builder.toString();
String src = app_path + "output/" + getMapId() + "/file.out";
File f = new File(src);
if (f.exists()) {
LOG.debug("shuffleToLink: the src " + src + " EXIST!");
}
//LOG.debug("src file size: "+f.length());
//input = new FileInputStream(src);
//input.skip(offset);
RandomAccessFile raf = new RandomAccessFile(f, "r");
input = Channels.newInputStream(raf.getChannel().position(offset));
IFileInputStream checksumIn = new IFileInputStream(input,
compressedLength, conf);
input = checksumIn;
// Are map-outputs compressed?
if (codec != null) {
decompressor.reset();
input = codec.createInputStream(input, decompressor);
}
try {
LOG.debug("offset: " + offset);
LOG.debug("memory.length: " + memory.length);
LOG.debug("compressedLength: " + compressedLength);
LOG.debug("decompressedLength: " + decompressedLength);
// TO-DO: would offset and length be OK to be int?
IOUtils.readFully(input, memory, 0, memory.length);
metrics.inputBytes((int) memory.length);
reporter.progress();
LOG.info("Read " + memory.length + " bytes from map-output for "
+ getMapId());
/**
* We've gotten the amount of data we were expecting. Verify the
* decompressor has nothing more to offer. This action also forces
* the decompressor to read any trailing bytes that weren't critical
* for decompression, which is necessary to keep the stream in sync.
*/
//if (input.read() >= 0) {
// throw new IOException(
// "Unexpected extra bytes from input stream for "
// + getMapId());
//}
input.close();
raf.close();
} catch (IOException ioe) {
// Close the streams
IOUtils.cleanup(LOG, input);
// Re-throw
throw ioe;
} finally {
CodecPool.returnDecompressor(decompressor);
}
}
@Override
public void commit() throws IOException {
merger.closeInMemoryFile(this);
}
@Override
public void abort() {
merger.unreserve(memory.length);
}
@Override
public String getDescription() {
return "LINK MEMORY";
}
public void setOffset(long offset) {
this.offset = offset;
}
}