package com.mozilla.grouperfish.batch.scheduling; import java.util.List; import java.util.concurrent.BlockingQueue; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.collect.ImmutableList; import com.google.inject.Inject; import com.mozilla.grouperfish.batch.handlers.CleanupHandler; import com.mozilla.grouperfish.batch.handlers.FetchHandler; import com.mozilla.grouperfish.batch.handlers.PutHandler; import com.mozilla.grouperfish.batch.handlers.RunHandler; import com.mozilla.grouperfish.batch.transforms.TransformProvider; import com.mozilla.grouperfish.model.Task; import com.mozilla.grouperfish.services.api.FileSystem; import com.mozilla.grouperfish.services.api.Grid; import com.mozilla.grouperfish.services.api.IndexProvider; /** * Implements the batch system as multiple distributed queues. * Assuming that the levels of the queue have different * resource usage (network, IO, CPU, RAM) characteristics, * this aims to always interleave handlers from every queue. * * Queues: * ------- * |...prepare...| * |...run...| * |...put...| * |...cleanup...| * * Each queue is processed concurrently by multiple workers, * running on each cluster node. * * A more advanced system could move specific handlers to * specific grouperfish nodes for optimum provisioning, or * vary the global number of handlers for any queue depending * on the queue size. */ public class PipeliningBatchService extends AbstractBatchService { private static final Logger log = LoggerFactory.getLogger(PipeliningBatchService.class); private final List<Worker> workers; private final BlockingQueue<Task> prepareQueue; @Inject public PipeliningBatchService( final Grid grid, final IndexProvider indexes, final FileSystem fs, final TransformProvider transforms) { super(indexes); final BlockingQueue<Task> prepQ = grid.queue("grouperfish_prepare"); final BlockingQueue<Task> runQ = grid.queue("grouperfish_run"); final BlockingQueue<Task> putQ = grid.queue("grouperfish_putresult"); final BlockingQueue<Task> cleanupQ = grid.queue("grouperfish_cleanup"); final BlockingQueue<Task> failQ = grid.queue("grouperfish_fail"); // Here we could in theory create e.g. several run workers instead of just one for this host... workers = new ImmutableList.Builder<Worker>() .add(new Worker(failQ, prepQ, runQ, new FetchHandler(fs, indexes))) .add(new Worker(failQ, runQ, putQ, new RunHandler(fs, transforms))) .add(new Worker(failQ, putQ, cleanupQ, new PutHandler(grid, fs))) .add(new Worker(failQ, cleanupQ, null, new CleanupHandler(fs))) .build(); prepareQueue = prepQ; log.info("Instantiated service: {}", getClass().getSimpleName()); } public void start() { for (final Worker worker : workers) worker.start(); } public void stop() { for (final Worker worker : workers) worker.cancel(); } @Override public void schedule(Task task) { // TODO Auto-generated method stub final BlockingQueue<Task> queue = prepareQueue; queue.add(task); } }