// Copyright 2011 Google Inc. All Rights Reserved.
package com.google.appengine.tools.mapreduce.impl.handlers;
import com.google.appengine.api.memcache.MemcacheServiceFactory;
import com.google.appengine.api.taskqueue.QueueFactory;
import com.google.appengine.api.taskqueue.TaskAlreadyExistsException;
import com.google.appengine.api.taskqueue.TaskOptions;
import com.google.appengine.tools.mapreduce.CounterNames;
import com.google.appengine.tools.mapreduce.InputReader;
import com.google.appengine.tools.mapreduce.InputReader.KeyValue;
import com.google.appengine.tools.mapreduce.Mapper;
import com.google.appengine.tools.mapreduce.MapperContext;
import com.google.appengine.tools.mapreduce.MapperShardListener;
import com.google.appengine.tools.mapreduce.MapperSliceListener;
import com.google.appengine.tools.mapreduce.Status;
import com.google.appengine.tools.mapreduce.impl.QuotaConsumer;
import com.google.appengine.tools.mapreduce.impl.QuotaManager;
import com.google.appengine.tools.mapreduce.impl.ShardStateEntity;
import com.google.appengine.tools.mapreduce.impl.util.Clock;
import com.google.appengine.tools.mapreduce.impl.util.Stopwatch;
import com.google.appengine.tools.mapreduce.impl.util.SystemClock;
import javax.servlet.http.HttpServletRequest;
/**
* Mapper Worker logic handler.
*
*/
final class WorkerHandler<K, V, OK, OV> {
// --------------------------- STATIC FIELDS ---------------------------
/**
* Default amount of quota to divvy up per controller execution.
*/
public static final long DEFAULT_QUOTA_BATCH_SIZE = 20;
// Amount of time to spend on actual map() calls per task execution.
private static final long PROCESSING_TIME_PER_TASK_MS = 10000;
// ------------------------------ FIELDS ------------------------------
private final Clock clock;
private final WorkerHandlerContext<K, V, OK, OV> context;
// --------------------------- CONSTRUCTORS ---------------------------
public WorkerHandler(WorkerHandlerContext<K, V, OK, OV> context) {
this(context, new SystemClock());
}
private WorkerHandler(WorkerHandlerContext<K, V, OK, OV> context, Clock clock) {
this.context = context;
this.clock = clock;
}
// --------------------- GETTER / SETTER METHODS ---------------------
/**
* Get the QuotaConsumer for current shard.
*/
private QuotaConsumer getQuotaConsumer() {
QuotaManager manager = new QuotaManager(MemcacheServiceFactory.getMemcacheService());
return new QuotaConsumer(
manager,
getQuotaConsumerKey(context.getJobId(), context.getShardNumber()),
DEFAULT_QUOTA_BATCH_SIZE);
}
// -------------------------- INSTANCE METHODS --------------------------
void handleWorkerImpl() {
ShardStateEntity<K, V, OK, OV> shardState = context.getShardState();
if (shardState == null) {
// Shard state has vanished. This is probably the task being executed
// out of order by taskqueue.
MapReduceServletImpl.LOG.warning("Shard state not found, aborting: " + context.getJobId());
return;
}
if (shardState.getStatus() != Status.ACTIVE) {
// Shard is not in an active state. This is probably the task being executed
// out of order by taskqueue.
MapReduceServletImpl.LOG.warning("Shard is not active, aborting: " + context.getJobId());
return;
}
MapReduceServletImpl.LOG.fine(
String.format("Running worker: %s %d %d",
context.getJobId(),
context.getShardNumber(),
context.getSliceNumber()));
Mapper<K, V, OK, OV> mapper = context.getSpecification().getMapper();
MapperContextImpl<K, V, OK, OV> mapperContext = new MapperContextImpl<K, V, OK, OV>(context,
context.getShardNumber());
if (context.getSliceNumber() == 0 && mapper instanceof MapperShardListener) {
// This is the first invocation for this mapper.
//noinspection unchecked
((MapperShardListener<K, V, OK, OV>) mapper).initializeShard(mapperContext);
}
boolean shouldContinue = processMapper(mapper, mapperContext);
if (!shouldContinue) {
shardState.setStatus(Status.DONE);
}
mapperContext.flush();
shardState.persist();
if (shouldContinue) {
scheduleWorker(context);
} else {
if (mapper instanceof MapperShardListener) {
// This is the last invocation for this mapper.
//noinspection unchecked
((MapperShardListener<K, V, OK, OV>) mapper).terminateShard(mapperContext);
}
}
}
private boolean processMapper(Mapper<K, V, OK, OV> mapper,
MapperContext<K, V, OK, OV> mapperContext) {
QuotaConsumer consumer = getQuotaConsumer();
try {
long startTime = clock.currentTimeMillis();
boolean shouldShardContinue = true;
if (consumer.check(1L)) {
if (mapper instanceof MapperSliceListener) {
//noinspection unchecked
((MapperSliceListener<K, V, OK, OV>) mapper).initializeSlice(mapperContext);
}
ShardStateEntity<K, V, OK, OV> shardState = context.getShardState();
InputReader<K, V> inputReader = shardState.getInputReader();
Stopwatch mapperStopwatch = new Stopwatch();
int mapperCalls = 0;
while (clock.currentTimeMillis() < startTime + PROCESSING_TIME_PER_TASK_MS) {
if (!consumer.consume(1L)) {
break;
}
if (!inputReader.hasNext()) {
shouldShardContinue = false;
break;
}
KeyValue<K, V> next = inputReader.next();
try {
mapperCalls++;
mapperStopwatch.start();
mapper.map(next.key, next.value, mapperContext);
} finally {
mapperStopwatch.stop();
}
}
mapperContext.getCounters().getCounter(CounterNames.MAPPER_CALLS).increment(mapperCalls);
mapperContext.getCounters().getCounter(CounterNames.MAPPER_WALLTIME_MSEC).increment(
mapperStopwatch.getTimeMillis());
if (mapper instanceof MapperSliceListener) {
//noinspection unchecked
((MapperSliceListener<K, V, OK, OV>) mapper).terminateSlice(mapperContext);
}
} else {
MapReduceServletImpl.LOG
.info("Out of mapper quota. Aborting request until quota is replenished."
+ " Consider increasing processing rate if you would like your mapper job "
+ "to complete faster.");
}
return shouldShardContinue;
} finally {
consumer.dispose();
}
}
// -------------------------- STATIC METHODS --------------------------
static String getQuotaConsumerKey(String jobId, int shardNumber) {
return String.format("%s:%d", jobId, shardNumber);
}
static <K, V, OK, OV> void handleMapperWorker(HttpServletRequest request) {
WorkerHandlerContext<K, V, OK, OV> context = WorkerHandlerContext
.createFromRequest(request);
new WorkerHandler<K, V, OK, OV>(context).handleWorkerImpl();
}
/**
* Schedules the initial worker callback execution for all shards.
*/
static <K, V, OK, OV> void scheduleShardsForNewJob(
HandlerContext<K, V, OK, OV> context,
Iterable<? extends InputReader<K, V>> sources) {
int i = 0;
for (InputReader<K, V> reader : sources) {
ShardStateEntity<K, V, OK, OV> shardState = ShardStateEntity.createForNewJob(
context.getJobId(), i, reader);
shardState.persist();
scheduleWorker(
WorkerHandlerContext.createForNewJob(context.getJobId(), i, context.getBaseUrl()));
i++;
}
}
/**
* Schedules a worker task on the appropriate queue.
*
* @param context the context for this MR job
*/
private static <K, V, OK, OV> void scheduleWorker(WorkerHandlerContext<K, V, OK, OV> context) {
TaskOptions taskOptions = context.createTaskOptionsForNextSlice();
try {
QueueFactory.getQueue(context.getSpecification().getWorkerQueueName()).add(taskOptions);
} catch (TaskAlreadyExistsException ignored) {
MapReduceServletImpl.LOG.warning("Worker task " + taskOptions + " already exists.");
}
}
}