//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.uima; import java.io.IOException; import java.io.InputStream; import java.util.Collection; import java.util.Map; import org.apache.uima.UimaContext; import org.apache.uima.fit.descriptor.ExternalResource; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.jcas.tcas.DocumentAnnotation; import org.apache.uima.resource.ResourceInitializationException; import com.google.common.base.Strings; import uk.gov.dstl.baleen.core.history.BaleenHistory; import uk.gov.dstl.baleen.core.pipelines.PipelineBuilder; import uk.gov.dstl.baleen.types.metadata.Metadata; import uk.gov.dstl.baleen.uima.utils.UimaUtils; /** Base implementation of a ContentExtractor. * * This abstract class provides the basis for content extractors. It provides metrics and support elements * to help development. * * Implementors should look to override doProcessStream as per {@link IContentExtractor} processFile. * * * */ public abstract class BaleenContentExtractor implements IContentExtractor { private UimaMonitor monitor; private UimaSupport support; /** * Baleen History resource * * @baleen.resource uk.gov.dstl.baleen.core.history.BaleenHistory */ public static final String KEY_HISTORY = PipelineBuilder.BALEEN_HISTORY; @ExternalResource(key = KEY_HISTORY, mandatory = false) BaleenHistory history; @Override public final void initialize(UimaContext context, Map<String, Object> params) throws ResourceInitializationException { String pipelineName = UimaUtils.getPipelineName(context); monitor = createMonitor(pipelineName); support = createSupport(pipelineName, context); monitor.startFunction("initialize"); doInitialize(context, params); monitor.finishFunction("initialize"); } protected UimaSupport createSupport(String pipelineName, UimaContext context) { return new UimaSupport(pipelineName, this.getClass(), history, monitor, UimaUtils.isMergeDistinctEntities(context)); } protected UimaMonitor createMonitor(String pipelineName) { return new UimaMonitor(pipelineName, this.getClass()); } /** * Called when the content extractor is being initialized. Any required resources, for example, should be opened at this point. * * @param context * UimaContext object passed by the Collection Processing Engine */ public abstract void doInitialize(UimaContext context, Map<String, Object> params) throws ResourceInitializationException; @Override public final void processStream(InputStream stream, String source, JCas jCas) throws IOException { monitor.startFunction("process"); doProcessStream(stream, source, jCas); monitor.finishFunction("process"); monitor.persistCounts(); } /** * Called when the content extractor is being asked to process an inputstream and extract the content. * * @param stream InputStream to process * @param jCas JCas to add content to */ protected abstract void doProcessStream(InputStream stream, String source, JCas jCas) throws IOException; @Override public final void destroy() { monitor.startFunction("destroy"); doDestroy(); monitor.finishFunction("destroy"); } /** * Called when the content extractor has finished and is closing down. Any open resources, for example, should be closed at this point. */ protected abstract void doDestroy(); /** * Gets the UimaMonitor object associated with this ContentExtractor, for instance to log errors. * * @return UimaMonitor object */ protected UimaMonitor getMonitor() { return monitor; } /** * Gets the UimaSupport object associated with this ContentExtractor, for instance to log errors. * * @return UimaSupport object */ protected UimaSupport getSupport() { return support; } // Common Support functions for quick access /** * Return the document annotation. * * @param jCas * @return the document annotation */ protected DocumentAnnotation getDocumentAnnotation(JCas jCas){ return getSupport().getDocumentAnnotation(jCas); } /** * Add an annotation to the JCas index, notifying UimaMonitor of the fact we * have done so * * @param annot * Annotation(s) to add */ protected void addToJCasIndex(Annotation... annotations) { getSupport().add(annotations); } /** * Add an annotation to the JCas index, notifying UimaMonitor of the fact we * have done so * * @param annot * Annotation(s) to add */ protected void addToJCasIndex(Collection<? extends Annotation> annotations) { getSupport().add(annotations); } /** * Adds a metadata annotation to the JCas * * @param jCas The JCas object to add the annotation to * @param name The metadata key * @param value The metadata value */ protected Metadata addMetadata(JCas jCas, String name, String value){ if (!Strings.isNullOrEmpty(name) && !Strings.isNullOrEmpty(value)) { Metadata md = new Metadata(jCas); md.setKey(name); md.setValue(value); addToJCasIndex(md); return md; } return null; } }