TokenNameFinderFineGrainedReportListener.java example

Explorer
opennlp-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package opennlp.tools.cmdline.namefind;

import java.io.OutputStream;
import java.util.Comparator;
import java.util.Map;

import opennlp.tools.cmdline.FineGrainedReportListener;
import opennlp.tools.namefind.NameSample;
import opennlp.tools.namefind.TokenNameFinderEvaluationMonitor;
import opennlp.tools.util.SequenceCodec;

/**
 * Generates a detailed report for the NameFinder.
 * <p>
 * It is possible to use it from an API and access the statistics using the
 * provided getters
 */
public class TokenNameFinderFineGrainedReportListener
    extends FineGrainedReportListener implements TokenNameFinderEvaluationMonitor {

  private SequenceCodec<String> sequenceCodec;

  /**
   * Creates a listener that will print to {@link System#err}
   */
  public TokenNameFinderFineGrainedReportListener(SequenceCodec<String> seqCodec) {
    this(seqCodec, System.err);
  }

  /**
   * Creates a listener that prints to a given {@link OutputStream}
   */
  public TokenNameFinderFineGrainedReportListener(SequenceCodec<String> seqCodec, OutputStream outputStream) {
    super(outputStream);
    this.sequenceCodec = seqCodec;
  }

  // methods inherited from EvaluationMonitor

  public void missclassified(NameSample reference, NameSample prediction) {
    statsAdd(reference, prediction);
  }

  public void correctlyClassified(NameSample reference,
                                  NameSample prediction) {
    statsAdd(reference, prediction);
  }

  private void statsAdd(NameSample reference, NameSample prediction) {
    String[] refTags = sequenceCodec.encode(reference.getNames(), reference.getSentence().length);
    String[] predTags = sequenceCodec.encode(prediction.getNames(), prediction.getSentence().length);

    // we don' want it to compute token frequency, so we pass an array of empty strings instead
    // of tokens
    getStats().add(new String[reference.getSentence().length], refTags, predTags);
  }

  @Override
  public Comparator<String> getMatrixLabelComparator(Map<String, ConfusionMatrixLine> confusionMatrix) {
    return new GroupedMatrixLabelComparator(confusionMatrix);
  }

  @Override
  public Comparator<String> getLabelComparator(Map<String, Counter> map) {
    return new GroupedLabelComparator(map);
  }

  public void writeReport() {
    printGeneralStatistics();
    printTagsErrorRank();
    printGeneralConfusionTable();
  }
}