/** * Copyright (C) 2012 cogroo <cogroo@cogroo.org> * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.cogroo.tools.featurizer; import opennlp.tools.util.eval.Evaluator; import opennlp.tools.util.eval.Mean; /** * The {@link FeaturizerEvaluator} measures the performance of the given * {@link Featurizer} with the provided reference {@link FeatureSample}s. * * @see Evaluator * @see Featurizer * @see FeatureSample */ public class FeaturizerEvaluator extends Evaluator<FeatureSample> { private Mean wordAccuracy = new Mean(); /** * The {@link Featurizer} used to create the predicted {@link FeatureSample} * objects. */ private Featurizer featurizer; /** * Initializes the current instance with the given {@link Featurizer}. * * @param featurizer * the {@link Featurizer} to evaluate. * @param listeners * evaluation listeners */ public FeaturizerEvaluator(Featurizer featurizer, FeaturizerEvaluationMonitor... listeners) { super(listeners); this.featurizer = featurizer; } /** * Evaluates the given reference {@link FeatureSample} object. * * This is done by finding the phrases with the {@link Featurizer} in the * sentence from the reference {@link FeatureSample}. The found phrases are * then used to calculate and update the scores. * * @param reference * the reference {@link FeatureSample}. * * @return the predicted sample */ @Override protected FeatureSample processSample(FeatureSample reference) { String[] predictedFeatures = featurizer.featurize(reference.getSentence(), reference.getTags()); String[] referenceTags = reference.getFeatures(); for (int i = 0; i < referenceTags.length; i++) { if (referenceTags[i].equals(predictedFeatures[i])) { wordAccuracy.add(1); } else { wordAccuracy.add(0); } } FeatureSample result = new FeatureSample(reference.getSentence(), reference.getLemmas(), reference.getTags(), predictedFeatures); return result; } /** * Retrieves the word accuracy. * * This is defined as: word accuracy = correctly detected tags / total words * * @return the word accuracy */ public double getWordAccuracy() { return wordAccuracy.mean(); } /** * Retrieves the total number of words considered in the evaluation. * * @return the word count */ public long getWordCount() { return wordAccuracy.count(); } }