/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.uima.examples; import java.io.BufferedInputStream; import java.io.File; import java.io.IOException; import org.apache.uima.UIMAFramework; import org.apache.uima.analysis_engine.AnalysisEngine; import org.apache.uima.analysis_engine.AnalysisEngineProcessException; import org.apache.uima.cas.CAS; import org.apache.uima.resource.ResourceSpecifier; import org.apache.uima.util.FileUtils; import org.apache.uima.util.XMLInputSource; /** * An example application that reads documents from files, sends them though an Analysis Engine, and * prints all discovered annotations to System.out. * <p> * The application takes two arguments: * <ol type="1"> * <li>The path to an XML descriptor for the Analysis Engine to be executed</li> * <li>An input directory containing files to be processed</li> * </ol> */ public class ExampleApplication { /** * Main program. * * @param args * Command-line arguments - see class description */ public static void main(String[] args) { try { File taeDescriptor = null; File inputDir = null; // Read and validate command line arguments boolean validArgs = false; if (args.length == 2) { taeDescriptor = new File(args[0]); inputDir = new File(args[1]); validArgs = taeDescriptor.exists() && !taeDescriptor.isDirectory() && inputDir.isDirectory(); } if (!validArgs) { printUsageMessage(); } else { // get Resource Specifier from XML file XMLInputSource in = new XMLInputSource(taeDescriptor); ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); // for debugging, output the Resource Specifier // System.out.println(specifier); // create Analysis Engine AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier); // create a CAS CAS cas = ae.newCAS(); // get all files in the input directory File[] files = inputDir.listFiles(); if (files == null) { System.out.println("No files to process"); } else { // process documents for (int i = 0; i < files.length; i++) { if (!files[i].isDirectory()) { processFile(files[i], ae, cas); } } } ae.destroy(); } } catch (Exception e) { e.printStackTrace(); } } /** * Prints usage message. */ private static void printUsageMessage() { System.err.println("Usage: java org.apache.uima.example.ExampleApplication " + "<Analysis Engine descriptor or PEAR file name> <input dir>"); } /** * Processes a single XML file and prints annotations to System.out * * @param aFile * file to process * @param aAE * Analysis Engine that will process the file * @param aCAS * CAS that will be used to hold analysis results */ private static void processFile(File aFile, AnalysisEngine aAE, CAS aCAS) throws IOException, AnalysisEngineProcessException { System.out.println("Processing file " + aFile.getName()); String document = FileUtils.file2String(aFile); document = document.trim(); // put document text in CAS aCAS.setDocumentText(document); // process aAE.process(aCAS); // print annotations to System.out PrintAnnotations.printAnnotations(aCAS, System.out); // reset the CAS to prepare it for processing the next document aCAS.reset(); } }