//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.consumers; import java.util.Set; import org.apache.uima.UimaContext; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.fit.descriptor.ConfigurationParameter; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; import uk.gov.dstl.baleen.types.structure.Structure; import uk.gov.dstl.baleen.uima.BaleenConsumer; import uk.gov.dstl.baleen.uima.utils.StructureHierarchy; import uk.gov.dstl.baleen.uima.utils.StructureUtil; import uk.gov.dstl.baleen.uima.utils.select.ItemHierarchy; /** * Abstract class for common code dealing with consuming Structure. */ public abstract class AbstractStructureConsumer extends BaleenConsumer { /** * A list of structural types which will be considered during record path * analysis. * * @baleen.config Paragraph,TableCell,ListItem,Aside, ... */ public static final String PARAM_TYPE_NAMES = "types"; /** The type names. */ @ConfigurationParameter(name = PARAM_TYPE_NAMES, mandatory = false) private String[] typeNames; /** The structural classes. */ protected Set<Class<? extends Structure>> structuralClasses; @Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException { super.doInitialize(aContext); structuralClasses = StructureUtil.getStructureClasses(typeNames); } @Override protected void doProcess(JCas jCas) throws AnalysisEngineProcessException { doProcess(StructureHierarchy.build(jCas, structuralClasses)); } /** * Called when consuming the document with the {@link ItemHierarchy} for the * document. * <p> * Implementations should override this method. * * @param structureHierarchy * the structure hierarchy * @throws AnalysisEngineProcessException */ protected abstract void doProcess(ItemHierarchy<Structure> structureHierarchy) throws AnalysisEngineProcessException; }