/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapred; import java.io.DataInput; import java.io.DataInputStream; import java.io.DataOutput; import java.io.DataOutputStream; import java.io.IOException; import java.net.URI; import java.util.HashSet; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.DataInputBuffer; import org.apache.hadoop.io.compress.CompressionCodec; import org.apache.hadoop.io.compress.DefaultCodec; import org.apache.hadoop.io.serializer.Deserializer; import org.apache.hadoop.io.serializer.SerializationFactory; import org.apache.hadoop.mapred.TaskCompletionEvent.Status; import org.apache.hadoop.mapred.buffer.BufferUmbilicalProtocol; import org.apache.hadoop.mapred.buffer.OutputFile; import org.apache.hadoop.mapred.buffer.OutputFile.Header; import org.apache.hadoop.mapred.buffer.impl.JOutputBuffer; import org.apache.hadoop.mapred.buffer.impl.ValuesIterator; import org.apache.hadoop.mapred.buffer.net.BufferExchange; import org.apache.hadoop.mapred.buffer.net.BufferRequest; import org.apache.hadoop.mapred.buffer.net.BufferExchangeSink; import org.apache.hadoop.mapred.buffer.net.ReduceBufferRequest; import org.apache.hadoop.util.Progress; import org.apache.hadoop.util.ReflectionUtils; import org.stanzax.quatrain.client.MrClient; public class PipelineMapTask extends MapTask implements InputCollector { private static final Log LOG = LogFactory.getLog(PipelineMapTask.class.getName()); private class ReduceOutputFetcher extends Thread { private TaskID reduceTaskId; private TaskUmbilicalProtocol trackerUmbilical; private BufferUmbilicalProtocol bufferUmbilical; private BufferExchangeSink sink; public ReduceOutputFetcher(TaskUmbilicalProtocol trackerUmbilical, BufferUmbilicalProtocol bufferUmbilical, BufferExchangeSink sink, TaskID reduceTaskId) { this.trackerUmbilical = trackerUmbilical; this.bufferUmbilical = bufferUmbilical; this.sink = sink; this.reduceTaskId = reduceTaskId; } public void run() { boolean requestSent = false; int eid = 0; while (true) { try { ReduceTaskCompletionEventsUpdate updates = trackerUmbilical.getReduceCompletionEvents(getJobID(), eid, Integer.MAX_VALUE); eid += updates.events.length; // Process the TaskCompletionEvents: // 1. Save the SUCCEEDED maps in knownOutputs to fetch the outputs. // 2. Save the OBSOLETE/FAILED/KILLED maps in obsoleteOutputs to stop fetching // from those maps. // 3. Remove TIPFAILED maps from neededOutputs since we don't need their // outputs at all. for (TaskCompletionEvent event : updates.events) { switch (event.getTaskStatus()) { case FAILED: case KILLED: case OBSOLETE: case TIPFAILED: return; case SUCCEEDED: if (requestSent) return; case RUNNING: { URI u = URI.create(event.getTaskTrackerHttp()); String host = u.getHost(); TaskAttemptID reduceAttemptId = event.getTaskAttemptId(); if (reduceAttemptId.getTaskID().equals(reduceTaskId) && !requestSent) { LOG.debug("Map " + getTaskID() + " sending buffer request to reducer " + reduceAttemptId); BufferExchange.BufferType type = BufferExchange.BufferType.FILE; if (snapshots) type = BufferExchange.BufferType.SNAPSHOT; if (stream) type = BufferExchange.BufferType.STREAM; BufferRequest request = new ReduceBufferRequest(host, getTaskID(), sink.getAddress(), type, reduceTaskId); try { bufferUmbilical.request(request); requestSent = true; if (event.getTaskStatus() == Status.SUCCEEDED) return; } catch (IOException e) { LOG.warn("BufferUmbilical problem sending request " + request + ". " + e); } } } break; } } } catch (IOException e) { e.printStackTrace(); } try { Thread.sleep(1000); } catch (InterruptedException e) { } } } } private JOutputBuffer buffer = null; private Mapper mapper; private BufferUmbilicalProtocol bufferUmbilical; private Reporter reporter; private Deserializer keyDeserializer; private Deserializer valDeserializer; private boolean snapshots = false; private boolean stream = false; public PipelineMapTask() { super(); } public PipelineMapTask(String jobFile, TaskAttemptID taskId, int partition) { super(jobFile, taskId, partition, "", new BytesWritable()); } @Override public int getNumberOfInputs() { return 1; } @Override public boolean isPipeline() { return !(jobCleanup || jobSetup || taskCleanup); } public TaskID pipelineReduceTask(JobConf job) { JobID reduceJobId = JobID.forName(job.get("mapred.job.pipeline")); return new TaskID(reduceJobId, false, getTaskID().getTaskID().id); } @Override public void localizeConfiguration(JobConf conf) throws IOException { super.localizeConfiguration(conf); } @Override public void write(DataOutput out) throws IOException { super.write(out); } @Override public void readFields(DataInput in) throws IOException { super.readFields(in); } @Override @SuppressWarnings("unchecked") public void run(final JobConf job, final TaskUmbilicalProtocol umbilical, final BufferUmbilicalProtocol bufferUmbilical,MrClient mrClient) throws IOException { this.reporter = getReporter(umbilical); this.bufferUmbilical = bufferUmbilical; // start thread that will handle communication with parent startCommunicationThread(umbilical); initialize(job, reporter); // check if it is a cleanupJobTask if (jobCleanup) { runJobCleanupTask(umbilical); return; } if (jobSetup) { runJobSetupTask(umbilical); return; } if (taskCleanup) { runTaskCleanupTask(umbilical); return; } if (job.get("mapred.job.pipeline", null) == null) { throw new IOException("PipelineMapTask: mapred.job.pipeline is not defined!"); } setPhase(TaskStatus.Phase.PIPELINE); Class inputKeyClass = job.getInputKeyClass(); Class inputValClass = job.getInputValueClass(); SerializationFactory serializationFactory = new SerializationFactory(job); keyDeserializer = serializationFactory.getDeserializer(inputKeyClass); valDeserializer = serializationFactory.getDeserializer(inputValClass); int numReduceTasks = job.getNumReduceTasks(); if (numReduceTasks == 0) { throw new IOException("PipelineMaptask has no reduce tasks!"); } snapshots = job.getBoolean("mapred.job.input.snapshots", false); this.mapper = ReflectionUtils.newInstance(job.getMapperClass(), job); /* This object will be the sink's input buffer. */ BufferExchangeSink sink = new BufferExchangeSink(job, this, this); sink.open(); /* Start the reduce output fetcher */ TaskID reduceTaskId = pipelineReduceTask(job); ReduceOutputFetcher rof = new ReduceOutputFetcher(umbilical, bufferUmbilical, sink, reduceTaskId); rof.setDaemon(true); long timestamp = System.currentTimeMillis(); synchronized (this) { LOG.info("PipelineMapTask: copy phase."); setPhase(TaskStatus.Phase.SHUFFLE); rof.start(); while (!sink.complete()) { setProgressFlag(); try { this.wait(); } catch (InterruptedException e) { } } LOG.info("PipelineMapTask: copy input took " + (System.currentTimeMillis() - timestamp) + " ms."); } setPhase(TaskStatus.Phase.MAP); setProgressFlag(); sink.close(); timestamp = System.currentTimeMillis(); getProgress().complete(); setProgressFlag(); /* OutputFile finalOutput = this.buffer.close(); bufferUmbilical.output(finalOutput); */ getProgress().complete(); LOG.info("PipelineMapTask: took " + (System.currentTimeMillis() - timestamp) + " ms to finalize final output."); done(umbilical); } @Override public void flush() throws IOException { // TODO Auto-generated method stub } @Override public void free() { // TODO Auto-generated method stub } @Override public synchronized boolean read(DataInputStream istream, Header header) throws IOException { CompressionCodec codec = null; Class<? extends CompressionCodec> codecClass = null; if (conf.getCompressMapOutput()) { codecClass = conf.getMapOutputCompressorClass(DefaultCodec.class); codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf); } if (this.buffer == null) { Class outputKeyClass = conf.getMapOutputKeyClass(); Class outputValClass = conf.getMapOutputValueClass(); this.buffer = new JOutputBuffer(bufferUmbilical, this, conf, reporter, getProgress(), false, outputKeyClass, outputValClass, codecClass,null); } else { this.buffer.malloc(); } IFile.Reader reader = new IFile.Reader(conf, istream, header.compressed(), codec, null); DataInputBuffer key = new DataInputBuffer(); DataInputBuffer value = new DataInputBuffer(); Object keyObject = null; Object valObject = null; while (reader.next(key, value)) { keyDeserializer.open(key); valDeserializer.open(value); keyObject = keyDeserializer.deserialize(keyObject); valObject = valDeserializer.deserialize(valObject); mapper.map(keyObject, valObject, buffer, reporter); } /* Note: do not close reader otherwise input stream will be closed as well. */ if (header.type() == OutputFile.Type.SNAPSHOT) { LOG.info("PipelineMapTask forward snapshot. progress = " + header.progress()); /* forward snapshot data. */ getProgress().set(header.progress()); OutputFile snapshot = buffer.snapshot(); bufferUmbilical.output(snapshot); } else if (header.type() == OutputFile.Type.STREAM) { OutputFile.StreamHeader streamHeader = (OutputFile.StreamHeader) header; LOG.info("PipelineMapTask forward stream. sequence = " + streamHeader.sequence()); buffer.stream(streamHeader.sequence(), false); } this.buffer.free(); /* Get ready for the next round */ this.mapper = ReflectionUtils.newInstance(conf.getMapperClass(), conf); return true; } @Override public ValuesIterator valuesIterator() throws IOException { // TODO Auto-generated method stub return null; } @Override public void close() { // TODO Auto-generated method stub } }