EdgeRankerByConfidence.java example

Explorer
dependency-parsing-toolbox-master
- Source
/////////////////////////////////////////////////////////////////
// This class implements one usage example of the confidence 
// estimation scores assigned to the edges of the parse trees.
// The edges of all the parse trees are ranked according to their
// confidence scores from low to high score. If the confidence
// scores provide good indication regarding the correctness of the
// parsed edge, then the incorrect edges will be ranked higher.
//
// Given the true parses it is possible to evaluate the quality
// of the ranking. The average-precision measure
// is used for evaluation. This scores can be used to tune the
// hyper-parameters of the applied confidence estimation methods.
//
// Author: Avihai Mejer, 2011
/////////////////////////////////////////////////////////////////
package mstparser;

import java.io.IOException;
import java.util.*;
import mstparser.io.DependencyReader;

public class EdgeRankerByConfidence {

    public void rankEdgesByConfidence(String act_file, String pred_file,
            String format) throws IOException {

        // Reader of file without confidence scores
        DependencyReader goldReader = DependencyReader.createDependencyReader(format);
        boolean labeled = goldReader.startReading(act_file);

        // Reader of file *with* confidence scores
        DependencyReader predictedReader = DependencyReader.createDependencyReaderWithConfidenceScores(format);
        boolean predLabeled = predictedReader.startReading(pred_file);

        if (labeled != predLabeled) {
            DependencyParser.out.println("Gold file and predicted file appear to differ "
                    + "on whether or not they are labeled. Expect problems!!!");
        }

        int instIndex = 0;

        DependencyInstance goldInstance = goldReader.getNext();
        DependencyInstance predInstance = predictedReader.getNext();

        LinkedList<PredictedEdge> allEdges = new LinkedList<PredictedEdge>();

        while (goldInstance != null) {

            int instanceLength = goldInstance.length();

            if (instanceLength != predInstance.length()) {
                DependencyParser.out.println("Lengths do not match on sentence "
                        + instIndex);
            }

            int[] goldHeads = goldInstance.heads;
            int[] predHeads = predInstance.heads;
            double[] confScores = predInstance.confidenceScores;

            // NOTE: the first item is the root info added during
            // nextInstance(), so we skip it.
            for (int i = 1; i < instanceLength; i++) {
                boolean correct = (predHeads[i] == goldHeads[i]);
                PredictedEdge edge = new PredictedEdge(correct, confScores[i]);
                allEdges.add(edge);
            }

            instIndex++;
            goldInstance = goldReader.getNext();
            predInstance = predictedReader.getNext();
        }

        // Sort all the edges according to the confidence score
        Collections.sort(allEdges, new CompareByConfidenceScore());

        double averagePrecision = avgPrecOfIncorrectEdgesRanking(allEdges);
        DependencyParser.out.println("Average-Precision: " + averagePrecision);
    }

    // Go over list of edges (sorted by edges confidence scores)
    // compute the precision every time an incorrect
    // edge is encountered.
    // Finally compute the average of all the precision values.
    // The maximal average-precision value is achieved if all the incorrect
    // edges are found a the beginning of the list and worst score if all
    // incorrect edges are at the end of the list.
    double avgPrecOfIncorrectEdgesRanking(List<PredictedEdge> edges) {
        int incorrectEdges = 0;
        int inspectedEdges = 0;
        double precSum = 0;

        Iterator<PredictedEdge> iter = edges.iterator();
        while (iter.hasNext()) {
            PredictedEdge edge = iter.next();
            inspectedEdges++;
            if (edge.correct == false) {
                // found incorrect edge
                incorrectEdges++;
                double prec = (double) incorrectEdges / inspectedEdges;
                precSum += prec;
            }
        }
        return precSum / incorrectEdges;
    }

    class PredictedEdge {

        boolean correct;
        double confScore;

        public PredictedEdge(boolean correct, double confScore) {
            this.correct = correct;
            this.confScore = confScore;
        }
    }

    class CompareByConfidenceScore implements Comparator<PredictedEdge> {

        public CompareByConfidenceScore() {
            super();
        }

        @Override
        public int compare(PredictedEdge o1, PredictedEdge o2) {
            if (o1.confScore < o2.confScore) {
                return -1;
            }
            if (o2.confScore < o1.confScore) {
                return 1;
            }
            return 0;
        }
    }
}