SofaExampleAnnotator.java example

Explorer
uima_prolog-master
- uima-PrologInterface-Examples
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.examples;

import java.util.Arrays;
import java.util.StringTokenizer;

import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;

/**
 * A simple multiple subject of analysis (multi-Sofa) example annotator Expects an English text Sofa
 * as input Creates a German text Sofa as output
 * 
 * This annotator has no configuration parameters, and requires no initialization method
 */

public class SofaExampleAnnotator extends CasAnnotator_ImplBase {
  public void process(CAS aCas) throws AnalysisEngineProcessException {
    CAS englishView, germanView;

    // get the CAS view for the English document
    englishView = aCas.getView("EnglishDocument");

    // Create the German text Sofa and open its view
    germanView = aCas.createView("GermanDocument");

    // Get some necessary Type System constants
    Type annot = englishView.getAnnotationType();
    Type cross = englishView.getTypeSystem().getType("sofa.test.CrossAnnotation");
    Feature other = cross.getFeatureByBaseName("otherAnnotation");

    // Get the English text
    String engText = englishView.getDocumentText();

    // Setup for translated text
    int engEnd = 0;
    int germBegin = 0;
    int germEnd = 0;
    StringBuffer translation = new StringBuffer();

    // Parse the English text
    StringTokenizer st = new StringTokenizer(engText);
    while (st.hasMoreTokens()) {
      String thisTok = st.nextToken();
      int engBegin = engText.indexOf(thisTok, engEnd);
      engEnd = engBegin + thisTok.length();

      // Create token annotations on English text
      AnnotationFS engAnnot = englishView.createAnnotation(annot, engBegin, engEnd);
      englishView.addFsToIndexes(engAnnot);

      // Simple word-by-word translation
      String germWord = translate(thisTok);

      // Accumulate the translated text
      if (germBegin > 0) {
        translation.append(' ');
        germBegin += 1;
      }
      translation.append(germWord);

      // Create token annotations on German text
      germEnd = germBegin + germWord.length();
      AnnotationFS germAnnot = germanView.createAnnotation(cross, germBegin, germEnd);
      germanView.addFsToIndexes(germAnnot);

      // add link to English text
      germAnnot.setFeatureValue(other, engAnnot);
      germBegin = germEnd;
    }

    // Finally, set the output tranlation Sofa data
    germanView.setDocumentText(translation.toString());

  }

  static char wThis[] = { 't', 'h', 'i', 's' };

  static char wBeer[] = { 'b', 'e', 'e', 'r' };

  static char wIs[] = { 'i', 's' };

  private String translate(String word) {
    String lword = word.toLowerCase();
    if (Arrays.equals(wThis, lword.toCharArray()))
      return "das";
    if (Arrays.equals(wBeer, lword.toCharArray()))
      return "bier";
    if (Arrays.equals(wIs, lword.toCharArray()))
      return "ist";
    return "gut";
  }

}