CasMultiplierExampleApplication.java example

Explorer
uima_prolog-master
- uima-PrologInterface-Examples
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.examples.casMultiplier;

import java.io.File;
import java.io.PrintStream;

import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.CasIterator;
import org.apache.uima.cas.CAS;
import org.apache.uima.examples.PrintAnnotations;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;

/**
 * An example application that shows how to interact with a CasMultiplier. A CasMultiplier is a type
 * of Analysis Engine that outputs new CASes. One use of a CasMultiplier is to divide a large CAS
 * into smaller pieces - a CasMultiplier that does this is called a "Segmenter".
 * <p>
 * This program takes two arguments -
 * <ul>
 * <li>The path to the Analysis Engine Descriptor for the CasMultiplier to run (such as
 * descriptors/cas_multiplier/SimpleTextSegmenter.xml or
 * descriptors/cas_multiplier/SegmenterAndTokenizerAE.xml)</li>
 * <li>The file name of a text document to analyze (to see the effect of segmentation, choose a
 * document larger than 100k characters, which is the default segment size produced by the
 * SimpleTextSegmenter.</li>
 * </ul>
 */
public class CasMultiplierExampleApplication {
  static PrintStream outputStream;

  /**
   * Main program.
   * 
   * @param args
   *          Command-line arguments - see class description
   */
  public static void main(String[] args) {
    try {
      // get Resource Specifier from XML file
      XMLInputSource in = new XMLInputSource(args[0]);
      ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

      // create AnalysisEngine
      AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);

      // read input text file
      File textFile = new File(args[1]);
      String document = FileUtils.file2String(textFile, "UTF-8");

      // create a new CAS and set the document text
      CAS initialCas = ae.newCAS();
      initialCas.setDocumentText(document);

      // pass the CAS to the AnalysisEngine and get back
      // a CasIterator for stepping over the output CASes that are produced.
      CasIterator casIterator = ae.processAndOutputNewCASes(initialCas);
      while (casIterator.hasNext()) {
        CAS outCas = casIterator.next();

        // dump the document text and annotations for this segment
        System.out.println("********* NEW SEGMENT *********");
        System.out.println(outCas.getDocumentText());
        PrintAnnotations.printAnnotations(outCas, System.out);

        // release the CAS (important)
        outCas.release();
      }

      // If there's a CAS Consumer inside this aggregate and we want
      // it's collectionProcessComplete method to be called, we need to
      // call it ourselves. If run inside a CPE this would get called
      // automatically.
      ae.collectionProcessComplete();
    } catch (Exception e) {
      e.printStackTrace();
    }
  }
}