/* * Copyright 2014 * Ubiquitous Knowledge Processing (UKP) Lab * Technische Universität Darmstadt * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package de.tudarmstadt.ukp.dkpro.core.textnormalizer.casfilter; import java.util.List; import org.apache.uima.analysis_engine.AnalysisEngineDescription; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.AbstractCas; import org.apache.uima.fit.component.JCasMultiplier_ImplBase; import org.apache.uima.fit.factory.AggregateBuilder; import org.apache.uima.fit.factory.FlowControllerFactory; import org.apache.uima.fit.pipeline.SimplePipeline; import org.apache.uima.flow.FlowController; import org.apache.uima.flow.impl.FixedFlowController; import org.apache.uima.jcas.JCas; import org.apache.uima.resource.ResourceInitializationException; /** * This class calls the {@code pass()} methods to determine whether a JCas should be filtered out or * passed on in a pipeline. Therefore, the {@code pass()} method has to be implemented, returning * true if a document should be passed on in the pipeline and false if it should be filtered out. * <p> * The filter (sub-)classes should be applied within a an {@link AggregateBuilder} applying a * {@link FlowController} as in the * {@link CasFilter_ImplBase#createAggregateBuilderDescription(AnalysisEngineDescription...)} * method. * <p> * Note that methods such as * {@link SimplePipeline#runPipeline(org.apache.uima.cas.CAS, org.apache.uima.analysis_engine.AnalysisEngine...)} * and * {@link SimplePipeline#iteratePipeline(org.apache.uima.collection.CollectionReaderDescription, AnalysisEngineDescription...)} * do not allow direct access to the JCas' produced by a JCasMultiplier. * * */ public abstract class CasFilter_ImplBase extends JCasMultiplier_ImplBase { private JCas current = null; @Override public boolean hasNext() throws AnalysisEngineProcessException { return current != null; } @Override public AbstractCas next() throws AnalysisEngineProcessException { JCas result = current; current = null; return result; } @Override public void process(JCas aJCas) throws AnalysisEngineProcessException { current = pass(aJCas) ? aJCas : null; } /** * This method determines whether a document / JCas is removed or retained. If this method * returns true, the document is retained, if it returns false, it is removed. * * @param aJCas * the currently processed JCas * @return true if the document is to be retained, false if it is to be removed */ protected abstract boolean pass(JCas aJCas); /** * Creates a new AnalysisEngineDescription from an Aggregrator that contains all input * AnalysisEngineDescriptions in given order. This is intended for the use of a filter like * {@link CasFilter_ImplBase}; all subsequent analysis engines will only see the documents that * have passed the filter. * * @param aEngines * {@link AnalysisEngineDescription}s that should be aggregated. * @return a single {@link AnalysisEngineDescription} aggregating all the input engines. * @throws ResourceInitializationException * if any input analysis engine cannot be initialized */ public static AnalysisEngineDescription createAggregateBuilderDescription( AnalysisEngineDescription... aEngines) throws ResourceInitializationException { AggregateBuilder aggregateBuilder = new AggregateBuilder(); aggregateBuilder.setFlowControllerDescription(FlowControllerFactory .createFlowControllerDescription(FixedFlowController.class, FixedFlowController.PARAM_ACTION_AFTER_CAS_MULTIPLIER, "drop")); for (AnalysisEngineDescription aEngine : aEngines) { aggregateBuilder.add(aEngine); } return aggregateBuilder.createAggregateDescription(); } /** * @see CasFilter_ImplBase#createAggregateBuilderDescription(AnalysisEngineDescription...) * @param aEngines * a list of {@link AnalysisEngineDescription}s * @return a single {@link AnalysisEngineDescription} aggregating all the input engines. * @throws ResourceInitializationException * if any input analysis engine cannot be initialized */ public static AnalysisEngineDescription createAggregateBuilderDescription( List<AnalysisEngineDescription> aEngines) throws ResourceInitializationException { return createAggregateBuilderDescription(aEngines .toArray(new AnalysisEngineDescription[aEngines.size()])); } }