//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.uima; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.uima.UimaContext; import org.apache.uima.collection.CollectionException; import org.apache.uima.fit.component.JCasCollectionReader_ImplBase; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.util.Progress; import uk.gov.dstl.baleen.core.metrics.MetricsFactory; import uk.gov.dstl.baleen.uima.utils.UimaUtils; /** * Base class for Baleen Job Schedulers. * * A scheduler is something which signals when to run the next job. Since we want the benefits of * dependency injection of UimaFit we use consider a collection reader as analagous component (it * signals when the next document is ready). Thus under the hood a scheduler is a collection reader. * * Note that use of collection reader, and Baleen's approach to single threaded pipelines, means * that events can not be scheduled at the same time. This is likely not a issue for most uses of * the Baleen jobs. * * Implementors need only provide the await() function. This should block until the next job should * be run. If another job should not be run then it should return false, else return true. * * Await will be called again as soon as the previous job has finished. * * The scheduler will pass any parameters it has defined as settings (@see {@link BaleenTask} which * are effectively carried by the JCas. * * To implement jobs that run periodically you m ay which to derive from the existing @see * {@link FixedRate} scheduler. * * @baleen.javadoc */ public abstract class BaleenScheduler extends JCasCollectionReader_ImplBase { /** The monitor. */ private UimaMonitor monitor; /** The config. */ private Map<String, String> config; @Override public final void initialize(final UimaContext context) throws ResourceInitializationException { // This will do initialization of resources, // but won't be included in the metrics super.initialize(context); final String pipelineName = UimaUtils.getPipelineName(context); monitor = new UimaMonitor(pipelineName, this.getClass()); getMonitor().startFunction("initialize"); // Pull the config parameters out for job settings config = BaleenScheduler.getConfigParameters(context); doInitialize(context); getMonitor().finishFunction("initialize"); } /** * Called when the collection reader is being initialized. Any required resources, for example, * should be opened at this point. * * @param context * The UimaContext for the collection reader * @throws ResourceInitializationException * the resource initialization exception */ protected void doInitialize(final UimaContext context) throws ResourceInitializationException { // Do nothing by default } @Override public final void getNext(final JCas jCas) throws IOException, CollectionException { getMonitor().startFunction("getNext"); MetricsFactory.getInstance().getPipelineMetrics(monitor.getPipelineName()).startDocumentProcess(); jCas.setDocumentText(JobSettings.class.getSimpleName()); jCas.setDocumentLanguage("en"); final JobSettings settings = new JobSettings(jCas); for (final Map.Entry<String, String> e : config.entrySet()) { settings.set(e.getKey(), e.getValue()); } getMonitor().finishFunction("getNext"); } /** * Called when the collection reader has finished and is closing down. Any open resources, for * example, should be closed at this point. * * @throws IOException * Signals that an I/O exception has occurred. */ protected void doDestroy() throws IOException { // Do nothing } @Override public void destroy() { super.destroy(); try { doDestroy(); } catch (final IOException e) { getMonitor().warn("Close on destroy", e); } } @Override public Progress[] getProgress() { return new Progress[0]; } /** * Override of the UIMA hasNext() method with logic to continuously check for new documents * until one is found. This prevents the collection reader from exiting (unless asked to), and * so creates a persistent collection reader and pipeline. * * @return true, if successful * @throws IOException * Signals that an I/O exception has occurred. * @throws CollectionException * the collection exception */ @Override public final boolean hasNext() throws IOException, CollectionException { return await(); } /** * Signals time for a new job to run. * * This method should block until the next job should be run (eg be on a timer). * * @return true to run the job, otherwise cancel */ protected abstract boolean await(); /** * Gets the monitor. * * @return the monitor */ protected final UimaMonitor getMonitor() { return monitor; } /** * Create a configuration map from a context. * * @param context * the context * @return non-empty map of config param name to config param value */ protected static Map<String, String> getConfigParameters(final UimaContext context) { //<String, String> due to limitations of Metadata final Map<String, String> ret = new HashMap<>(); for (final String name : context.getConfigParameterNames()) { ret.put(name, context.getConfigParameterValue(name).toString()); } return ret; } }