/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.uima.examples.cpe; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Iterator; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; import org.apache.uima.collection.CasConsumer_ImplBase; import org.apache.uima.collection.base_cpm.CasObjectProcessor; import org.apache.uima.examples.SourceDocumentInformation; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.resource.ResourceConfigurationException; import org.apache.uima.resource.ResourceInitializationException; import org.apache.uima.resource.ResourceProcessException; import org.apache.uima.util.ProcessTrace; /** * An example of CAS Consumer. <br> * AnnotationPrinter prints to an output file all annotations in the CAS. <br> * Parameters needed by the AnnotationPrinter are * <ol> * <li> "outputFile" : file to which the output files should be written.</li> * </ol> * <br> * These parameters are set in the initialize method to the values specified in the descriptor file. * <br> * These may also be set by the application by using the setConfigParameterValue methods. * * */ public class AnnotationPrinter extends CasConsumer_ImplBase implements CasObjectProcessor { File outFile; FileWriter fileWriter; public AnnotationPrinter() { } /** * Initializes this CAS Consumer with the parameters specified in the descriptor. * * @throws ResourceInitializationException * if there is error in initializing the resources */ public void initialize() throws ResourceInitializationException { // extract configuration parameter settings String oPath = (String) getUimaContext().getConfigParameterValue("outputFile"); // Output file should be specified in the descriptor if (oPath == null) { throw new ResourceInitializationException( ResourceInitializationException.CONFIG_SETTING_ABSENT, new Object[] { "outputFile" }); } // If specified output directory does not exist, try to create it outFile = new File(oPath.trim()); if (outFile.getParentFile() != null && !outFile.getParentFile().exists()) { if (!outFile.getParentFile().mkdirs()) throw new ResourceInitializationException( ResourceInitializationException.RESOURCE_DATA_NOT_VALID, new Object[] { oPath, "outputFile" }); } try { fileWriter = new FileWriter(outFile); } catch (IOException e) { throw new ResourceInitializationException(e); } } /** * Processes the CasContainer which was populated by the TextAnalysisEngines. <br> * In this case, the CAS index is iterated over selected annotations and printed out into an * output file * * @param aCAS * CasContainer which has been populated by the TAEs * * @throws ResourceProcessException * if there is an error in processing the Resource * * @see org.apache.uima.collection.base_cpm.CasObjectProcessor#processCas(CAS) */ public synchronized void processCas(CAS aCAS) throws ResourceProcessException { JCas jcas; try { jcas = aCAS.getJCas(); } catch (CASException e) { throw new ResourceProcessException(e); } boolean titleP = false; String docUri = null; Iterator it = jcas.getAnnotationIndex(SourceDocumentInformation.type).iterator(); if (it.hasNext()) { SourceDocumentInformation srcDocInfo = (SourceDocumentInformation) it.next(); docUri = srcDocInfo.getUri(); } // iterate and print annotations Iterator annotationIter = jcas.getAnnotationIndex().iterator(); while (annotationIter.hasNext()) { Annotation annot = (Annotation) annotationIter.next(); if (titleP == false) { try { fileWriter.write("\n\n<++++NEW DOCUMENT++++>\n"); if (docUri != null) fileWriter.write("DOCUMENT URI:" + docUri + "\n"); fileWriter.write("\n"); } catch (IOException e) { throw new ResourceProcessException(e); } titleP = true; } // get the text that is enclosed within the annotation in the CAS String aText = annot.getCoveredText(); aText = aText.replace('\n', ' '); aText = aText.replace('\r', ' '); // System.out.println( annot.getType().getName() + " "+aText); try { fileWriter.write(annot.getType().getName() + " " + aText + "\n"); fileWriter.flush(); } catch (IOException e) { throw new ResourceProcessException(e); } } } /** * Called when a batch of processing is completed. * * @param aTrace * ProcessTrace object that will log events in this method. * @throws ResourceProcessException * if there is an error in processing the Resource * @throws IOException * if there is an IO Error * * @see org.apache.uima.collection.CasConsumer#batchProcessComplete(ProcessTrace) */ public void batchProcessComplete(ProcessTrace aTrace) throws ResourceProcessException, IOException { // nothing to do in this case as AnnotationPrinter doesnot do // anything cumulatively } /** * Called when the entire collection is completed. * * @param aTrace * ProcessTrace object that will log events in this method. * @throws ResourceProcessException * if there is an error in processing the Resource * @throws IOException * if there is an IO Error * @see org.apache.uima.collection.CasConsumer#collectionProcessComplete(ProcessTrace) */ public void collectionProcessComplete(ProcessTrace aTrace) throws ResourceProcessException, IOException { if (fileWriter != null) { fileWriter.close(); } } /** * Reconfigures the parameters of this Consumer. <br> * This is used in conjunction with the setConfigurationParameterValue to set the configuration * parameter values to values other than the ones specified in the descriptor. * * @throws ResourceConfigurationException * if the configuration parameter settings are invalid * * @see org.apache.uima.resource.ConfigurableResource#reconfigure() */ public void reconfigure() throws ResourceConfigurationException { super.reconfigure(); // extract configuration parameter settings String oPath = (String) getUimaContext().getConfigParameterValue("outputFile"); File oFile = new File(oPath.trim()); // if output file has changed, close exiting file and open new if (!oFile.equals(this.outFile)) { this.outFile = oFile; try { fileWriter.close(); // If specified output directory does not exist, try to create it if (oFile.getParentFile() != null && !oFile.getParentFile().exists()) { if (!oFile.getParentFile().mkdirs()) throw new ResourceConfigurationException( ResourceInitializationException.RESOURCE_DATA_NOT_VALID, new Object[] { oPath, "outputFile" }); } fileWriter = new FileWriter(oFile); } catch (IOException e) { throw new ResourceConfigurationException(); } } } /** * Called if clean up is needed in case of exit under error conditions. * * @see org.apache.uima.resource.Resource#destroy() */ public void destroy() { if (fileWriter != null) { try { fileWriter.close(); } catch (IOException e) { // ignore IOException on destroy } } } }