/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.mapreduce.task.reduce; import java.io.IOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.mapred.MapTaskCompletionEventsUpdate; import org.apache.hadoop.mapred.TaskCompletionEvent; import org.apache.hadoop.mapred.TaskUmbilicalProtocol; import org.apache.hadoop.mapreduce.TaskAttemptID; class EventFetcher<K,V> extends Thread { private static final long SLEEP_TIME = 1000; private static final int MAX_RETRIES = 10; private static final int RETRY_PERIOD = 5000; private static final Log LOG = LogFactory.getLog(EventFetcher.class); private final TaskAttemptID reduce; private final TaskUmbilicalProtocol umbilical; private final ShuffleScheduler<K,V> scheduler; private int fromEventIdx = 0; private final int maxEventsToFetch; private final ExceptionReporter exceptionReporter; private volatile boolean stopped = false; public EventFetcher(TaskAttemptID reduce, TaskUmbilicalProtocol umbilical, ShuffleScheduler<K,V> scheduler, ExceptionReporter reporter, int maxEventsToFetch) { setName("EventFetcher for fetching Map Completion Events"); setDaemon(true); this.reduce = reduce; this.umbilical = umbilical; this.scheduler = scheduler; exceptionReporter = reporter; this.maxEventsToFetch = maxEventsToFetch; } @Override public void run() { int failures = 0; LOG.info(reduce + " Thread started: " + getName()); try { while (!stopped && !Thread.currentThread().isInterrupted()) { try { int numNewMaps = getMapCompletionEvents(); failures = 0; if (numNewMaps > 0) { LOG.info(reduce + ": " + "Got " + numNewMaps + " new map-outputs"); } LOG.debug("GetMapEventsThread about to sleep for " + SLEEP_TIME); if (!Thread.currentThread().isInterrupted()) { Thread.sleep(SLEEP_TIME); } } catch (InterruptedException e) { LOG.info("EventFetcher is interrupted.. Returning"); return; } catch (IOException ie) { LOG.info("Exception in getting events", ie); // check to see whether to abort if (++failures >= MAX_RETRIES) { throw new IOException("too many failures downloading events", ie); } // sleep for a bit if (!Thread.currentThread().isInterrupted()) { Thread.sleep(RETRY_PERIOD); } } } } catch (InterruptedException e) { return; } catch (Throwable t) { exceptionReporter.reportException(t); return; } } public void shutDown() { this.stopped = true; interrupt(); try { join(5000); } catch(InterruptedException ie) { LOG.warn("Got interrupted while joining " + getName(), ie); } } /** * Queries the {@link TaskTracker} for a set of map-completion events * from a given event ID. * @throws IOException */ protected int getMapCompletionEvents() throws IOException, InterruptedException { int numNewMaps = 0; TaskCompletionEvent events[] = null; do { MapTaskCompletionEventsUpdate update = umbilical.getMapCompletionEvents( (org.apache.hadoop.mapred.JobID)reduce.getJobID(), fromEventIdx, maxEventsToFetch, (org.apache.hadoop.mapred.TaskAttemptID)reduce); events = update.getMapTaskCompletionEvents(); LOG.debug("Got " + events.length + " map completion events from " + fromEventIdx); assert !update.shouldReset() : "Unexpected legacy state"; // Update the last seen event ID fromEventIdx += events.length; // Process the TaskCompletionEvents: // 1. Save the SUCCEEDED maps in knownOutputs to fetch the outputs. // 2. Save the OBSOLETE/FAILED/KILLED maps in obsoleteOutputs to stop // fetching from those maps. // 3. Remove TIPFAILED maps from neededOutputs since we don't need their // outputs at all. for (TaskCompletionEvent event : events) { scheduler.resolve(event); if (TaskCompletionEvent.Status.SUCCEEDED == event.getTaskStatus()) { ++numNewMaps; } } } while (events.length == maxEventsToFetch); return numNewMaps; } }