package org.apache.hadoop.mapred; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Collections; import java.net.InetSocketAddress; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.ipc.ProtocolSignature; import org.apache.hadoop.ipc.RPC; import org.apache.hadoop.ipc.RPC.VersionIncompatible; import org.apache.hadoop.ipc.VersionedProtocol; import org.apache.hadoop.mapred.CoronaTaskTracker.JobTrackerReporter; import org.apache.hadoop.mapred.SortedRanges.Range; /** * TaskUmbilicalProtocol used by Task in Corona * In Corona we allow each individual Task to talk to JobTracker directly. */ class CoronaDirectTaskUmbilical implements TaskUmbilicalProtocol { public static final Log LOG = LogFactory.getLog(CoronaDirectTaskUmbilical.class); final private TaskUmbilicalProtocol taskTrackerUmbilical; private InterTrackerProtocol jobTracker; final private List<TaskCompletionEvent> mapEventFetched; private int totalEventsFetched = 0; /** Conf entry for direct umbilical address */ public static final String DIRECT_UMBILICAL_JT_ADDRESS = "mapred.direct.task.umbilical.address"; /** Conf entry for secondary tracker addres */ public static final String DIRECT_UMBILICAL_FALLBACK_ADDRESS = "mapred.direct.task.umbilical.secondary"; // member used to handle IO exception final int maxErrorCount = 10; int errorCount = 0; long jtCallTimeout; /** * Deserialize InetSocketAddress from String from given key in conf * @param conf job conf * @param key name of entry * @return address */ public static InetSocketAddress getAddress(JobConf conf, String key) { String str = conf.get(key); if (str == null) return null; String hostPortPair[] = str.split(":"); if (hostPortPair.length != 2) return null; return new InetSocketAddress(hostPortPair[0], Integer.parseInt(hostPortPair[1])); } /** * Serialize InetSocketAddress to String and saves in given conf * @param conf job conf * @param key name of entry * @param address address to save */ public static void setAddress(JobConf conf, String key, InetSocketAddress address) { if (address == null) { conf.unset(key); return; } String addrStr = address.getHostName() + ":" + address.getPort(); conf.set(key, addrStr); } /** Job tracker timeout */ private static long jtConnectTimeoutMsec; /** Job configuration */ private final JobConf conf; /** Secondary fallback address */ private final InetSocketAddress secondaryTracker; /** Address of current JT */ private InetSocketAddress currentTracker; /** Mutable pointer to handle closing RPC client outside of this class */ private final VersionedProtocolPointer clientPointer; public static CoronaDirectTaskUmbilical createDirectUmbilical( TaskUmbilicalProtocol taskTracker, InetSocketAddress jobTrackerAddress, InetSocketAddress secondaryTrackerAddress, JobConf conf) throws IOException { LOG.info("Creating direct umbilical to " + jobTrackerAddress.toString()); jtConnectTimeoutMsec = conf.getLong( "corona.jobtracker.connect.timeout.msec", 60000L); return new CoronaDirectTaskUmbilical(taskTracker, conf, jobTrackerAddress, secondaryTrackerAddress); } public List<VersionedProtocolPointer> getCreatedProxies() { return Collections.singletonList(clientPointer); } public void close() { RPC.stopProxy(jobTracker); } private CoronaDirectTaskUmbilical(TaskUmbilicalProtocol taskTrackerUmbilical, JobConf conf, InetSocketAddress currentTracker, InetSocketAddress secondaryTracker) throws IOException { this.taskTrackerUmbilical = taskTrackerUmbilical; this.mapEventFetched = new ArrayList<TaskCompletionEvent>(); this.conf = conf; this.currentTracker = currentTracker; this.secondaryTracker = secondaryTracker; this.clientPointer = new VersionedProtocolPointer(); this.jtCallTimeout = conf.getLong("corona.jt.call.timeout", 1000L); boolean succeeded = (new Caller<Boolean>() { /** Connect if this is first try, new tries reconnects will be handled by * caller fall back mechanism */ private boolean firstTry = true; @Override protected Boolean call() throws IOException { if (firstTry) { firstTry = false; connect(CoronaDirectTaskUmbilical.this.currentTracker); } return true; } }).makeCall(); if (!succeeded) { throw new IOException("Failed to initialize DirectTaskUmbilical client"); } } @Override public long getProtocolVersion(String protocol, long clientVersion) throws VersionIncompatible, IOException { return taskTrackerUmbilical.getProtocolVersion(protocol, clientVersion); } @Override public ProtocolSignature getProtocolSignature(String protocol, long clientVersion, int clientMethodsHash) throws IOException { return taskTrackerUmbilical.getProtocolSignature( protocol, clientVersion, clientMethodsHash); } @Override public JvmTask getTask(JvmContext context) throws IOException { return taskTrackerUmbilical.getTask(context); } @Override public boolean statusUpdate(TaskAttemptID taskId, TaskStatus taskStatus) throws IOException, InterruptedException { return taskTrackerUmbilical.statusUpdate(taskId, taskStatus); } @Override public void reportDiagnosticInfo(TaskAttemptID taskid, String trace) throws IOException { taskTrackerUmbilical.reportDiagnosticInfo(taskid, trace); } @Override public void reportNextRecordRange(TaskAttemptID taskid, Range range) throws IOException { taskTrackerUmbilical.reportNextRecordRange(taskid, range); } @Override public boolean ping(TaskAttemptID taskid) throws IOException { return taskTrackerUmbilical.ping(taskid); } @Override public void done(TaskAttemptID taskid) throws IOException { taskTrackerUmbilical.done(taskid); } @Override public void commitPending(TaskAttemptID taskId, TaskStatus taskStatus) throws IOException, InterruptedException { taskTrackerUmbilical.commitPending(taskId, taskStatus); } @Override public boolean canCommit(TaskAttemptID taskid) throws IOException { return taskTrackerUmbilical.canCommit(taskid); } @Override public void shuffleError(TaskAttemptID taskId, String message) throws IOException { taskTrackerUmbilical.shuffleError(taskId, message); } @Override public void fsError(TaskAttemptID taskId, String message) throws IOException { taskTrackerUmbilical.fsError(taskId, message); } @Override public void fatalError(TaskAttemptID taskId, String message) throws IOException { taskTrackerUmbilical.fatalError(taskId, message); } @Override public MapTaskCompletionEventsUpdate getMapCompletionEvents(final JobID jobId, int fromIndex, int maxLocs, TaskAttemptID id) throws IOException { // Remember old address to notice change InetSocketAddress oldTrackerAddr = currentTracker; TaskCompletionEvent[] recentEvents = (new Caller<TaskCompletionEvent[]>() { @Override protected TaskCompletionEvent[] call() throws IOException { return jobTracker.getTaskCompletionEvents(jobId, totalEventsFetched, Integer.MAX_VALUE); } }).makeCall(); // Check if we've changed JobTracker if (!oldTrackerAddr.equals(currentTracker)) { // if so reset counter for all TaskCompletionEvents and send reset flag LOG.info("JobTracker did a failover from " + oldTrackerAddr + " to " + currentTracker); totalEventsFetched = 0; return new MapTaskCompletionEventsUpdate(TaskCompletionEvent.EMPTY_ARRAY, true); } else { // if not proceed as usual totalEventsFetched += recentEvents.length; for (TaskCompletionEvent event : recentEvents) { if (event.isMapTask()) { mapEventFetched.add(event); } } int toIndex = fromIndex + maxLocs; toIndex = toIndex > mapEventFetched.size() ? mapEventFetched.size() : toIndex; TaskCompletionEvent[] result = mapEventFetched .subList(fromIndex, toIndex).toArray( new TaskCompletionEvent[toIndex - fromIndex]); return new MapTaskCompletionEventsUpdate(result, false); } } /** * Caller that automatically handles switching to new JT. * @param <T> function return type */ private abstract class Caller<T> extends CoronaJTFallbackCaller<T> { @Override protected void handleIOException(IOException e) throws IOException { errorCount++; if (errorCount >= maxErrorCount) { LOG.error("Too many errors " + maxErrorCount + " in calling " + currentTracker, e); throw e; } else { long backoff = errorCount * jtCallTimeout; LOG.warn( "Error " + errorCount + " in calling to " + currentTracker + " will wait " + backoff + " msec", e); try { Thread.sleep(backoff); } catch (InterruptedException ie) { } } } @Override protected JobConf getConf() { return CoronaDirectTaskUmbilical.this.conf; } @Override protected InetSocketAddress getSecondaryTracker() { return CoronaDirectTaskUmbilical.this.secondaryTracker; } @Override protected InetSocketAddress getCurrentClientAddress() { return CoronaDirectTaskUmbilical.this.currentTracker; } @Override protected void connect(InetSocketAddress newAddress) throws IOException { int rpcTimeout = (int) jtConnectTimeoutMsec; CoronaDirectTaskUmbilical.this.currentTracker = newAddress; jobTracker = RPC.waitForProxy(InterTrackerProtocol.class, InterTrackerProtocol.versionID, newAddress, conf, jtConnectTimeoutMsec, rpcTimeout); clientPointer.setClient(jobTracker); } @Override protected void shutdown() { RPC.stopProxy(jobTracker); } } /** * Encapsulates VersionedProtocol in mutable object for closing RPC clients * that has changes */ public static class VersionedProtocolPointer { /** RPC client */ private VersionedProtocol client; /** * Default c'tor */ public VersionedProtocolPointer() { } /** * Init c'tor */ public VersionedProtocolPointer(VersionedProtocol client) { this.client = client; } public VersionedProtocol getClient() { return client; } public void setClient(VersionedProtocol client) { this.client = client; } } }