/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.util; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.StringTokenizer; /** * A program that takes in a reference transcript and a hypothesis transcript and figures out how many gap insertion * errors are there. The hypothesis transcript file should contain timestamps for when each word was entered and exited. * <p>The gap insertion detection algorithm works as follows. It takes each hypothesized word individually and see * whether it falls into a non-speech region in the reference transcript. If it does, that hypothesized word is counted * as a gap insertion. */ public class GapInsertionDetector { private ReferenceFile referenceFile; private HypothesisFile hypothesisFile; private boolean showGapInsertions; /** * Create a gap insertion detector to detect gap insertions using the given reference file and hypothesis file. * * @param referenceFile the file of references * @param hypothesisFile the file of hypotheses * @param showGapInsertions if true show gap insertions. * @throws IOException if IO went wrong */ public GapInsertionDetector(String referenceFile, String hypothesisFile, boolean showGapInsertions) throws IOException { this.referenceFile = new ReferenceFile(referenceFile); this.hypothesisFile = new HypothesisFile(hypothesisFile); } /** * Detect the gap insertion errors. * * @return the total number of gap insertion errors * @throws IOException if IO error occurred */ public int detect() throws IOException { int gaps = 0; boolean done = false; ReferenceUtterance reference = referenceFile.nextUtterance(); StringBuilder log = new StringBuilder(); while (!done) { HypothesisWord word = hypothesisFile.nextWord(); if (word != null) { boolean hasGapError = false; // go to the relevant reference utterance while (reference != null && reference.getEndTime() < word.getStartTime()) { reference = referenceFile.nextUtterance(); } // 'reference' should be the relevant one now if (reference != null) { if (reference.isSilenceGap()) { hasGapError = true; } else { while (reference.getEndTime() < word.getEndTime()) { reference = referenceFile.nextUtterance(); if (reference == null || reference.isSilenceGap()) { hasGapError = true; break; } } } } else { // if no more reference words, this is a gap insertion hasGapError = true; } if (hasGapError) { gaps++; if (showGapInsertions) { log.append("GapInsError: Utterance: ").append(hypothesisFile.getUtteranceCount()) .append(" Word: ").append(word.getText()).append(" (") .append(word.getStartTime()).append(',').append(word.getEndTime()).append("). "); if (reference != null) { assert reference.isSilenceGap(); log.append("Reference: <sil> (").append(reference.getStartTime()) .append(',').append(reference.getEndTime()).append(')'); } log.append('\n'); } } } else { done = true; } } if (showGapInsertions) { System.out.println(log); } return gaps; } /* * A command line program for detecting gap insertion errors. To run this program, type: <code> java * GapInsertionDetector {propsFile} {referenceFile} {hypothesisFile} </code> The propsFile need to have only one * property: <code> edu.cmu.sphinx.util.GapInsertionDetector.showGapInsertions=true/false </code> */ public static void main(String[] argv) { if (argv.length < 2) { System.out.println("Usage: java GapInsertionDetector " + "<referenceFile> <hypothesisFile>"); } try { String referenceFile = argv[0]; String hypothesisFile = argv[1]; GapInsertionDetector gid = new GapInsertionDetector (referenceFile, hypothesisFile, true); System.out.println("# of gap insertions: " + gid.detect()); } catch (Exception e) { e.printStackTrace(); } } } /** * Creates a ReferenceFile. */ class ReferenceFile { private BufferedReader reader; /** * Creates a ReferenceFile, given the name of the reference file. * * @param fileName the name of the reference file */ ReferenceFile(String fileName) throws IOException { reader = new BufferedReader(new FileReader(fileName)); } /** * Returns the next available ReferenceUtterance. This method skips all the silence gaps. * * @return the next available ReferenceUtterance, or null if the end of file has been reached. */ ReferenceUtterance nextUtterance() throws IOException { String line = reader.readLine(); if (line != null) { return new ReferenceUtterance(line); } else { return null; } } } /** * Converts a line in the HUB-4 .stm reference file into an object. */ class ReferenceUtterance { private boolean isSilenceGap; private final float startTime; private final float endTime; private final String[] words; /** * Creates a ReferenceUtterance from the given line of reference. * * @param line the line of reference, in the format: [test_name] [category] [speaker_name|"inter_segment_gap"] * [start_time] [end_time] [<params>] [reference_text] */ ReferenceUtterance(String line) { StringTokenizer st = new StringTokenizer(line); st.nextToken(); // parse the test set name st.nextToken(); // parse category String type = st.nextToken(); // parse speaker if (type.equals("inter_segment_gap")) { isSilenceGap = true; } startTime = Float.parseFloat(st.nextToken()); // parse start time endTime = Float.parseFloat(st.nextToken()); // parse end time if (st.hasMoreTokens()) { st.nextToken(); // parse <...> words = new String[st.countTokens()]; for (int i = 0; i < words.length; i++) { words[i] = st.nextToken(); } } else { words = new String[0]; } } /** * Returns true if this is a silence gap. * * @return true if this is a silence gap, false otherwise. */ boolean isSilenceGap() { return isSilenceGap; } /** * Returns the starting time (in seconds) of this utterance. * * @return the starting time of this utterance */ float getStartTime() { return startTime; } /** * Returns the ending time (in seconds) of this utterance. * * @return the ending time of this utterance */ float getEndTime() { return endTime; } /** * Returns the text of this utterance. * * @return the text of this utterance */ String[] getWords() { return words; } } class HypothesisFile { private BufferedReader reader; private Iterator<HypothesisWord> iterator; private int utteranceCount; /** * Creates a HypothesisFile from the given file. * * @param fileName the name of the hypothesis file */ HypothesisFile(String fileName) throws IOException { reader = new BufferedReader(new FileReader(fileName)); } /** * Returns the next hypothesized word in the hypothesis file. * * @return the next hypothesized word */ HypothesisWord nextWord() throws IOException { if (iterator == null || !iterator.hasNext()) { HypothesisUtterance utterance = nextUtterance(); if (utterance != null) { iterator = utterance.getWords().iterator(); } else { iterator = null; } } if (iterator == null) { return null; } else { return iterator.next(); } } /** * Returns the next available hypothesis utterance. * * @return the next available hypothesis utterance, or null if the end of file has been reached */ private HypothesisUtterance nextUtterance() throws IOException { String line = reader.readLine(); if (line != null) { utteranceCount++; HypothesisUtterance utterance = new HypothesisUtterance(line); if (utterance.getWordCount() <= 0) { return nextUtterance(); } else { return utterance; } } else { return null; } } /** * Returns the utterance count. * * @return the utterance count */ public int getUtteranceCount() { return utteranceCount; } } /** * A hypothesis utterance, which will give you a list of hypothesis words. */ class HypothesisUtterance { private final List<HypothesisWord> words; private float startTime; private float endTime; /** * Creates a hypothesis utterance from a line of input describing the hypothesis. */ HypothesisUtterance(String line) { words = new LinkedList<HypothesisWord>(); StringTokenizer st = new StringTokenizer(line, " \t\n\r\f(),"); while (st.hasMoreTokens()) { String text = st.nextToken(); try { float myStartTime = Float.parseFloat(st.nextToken()); float myEndTime = Float.parseFloat(st.nextToken()); HypothesisWord word = new HypothesisWord (text, myStartTime, myEndTime); words.add(word); } catch (NumberFormatException nfe) { System.out.println("NumberFormatException at line: " + line); nfe.printStackTrace(); } } if (!words.isEmpty()) { HypothesisWord firstWord = words.get(0); startTime = firstWord.getStartTime(); HypothesisWord lastWord = words.get(words.size() - 1); endTime = lastWord.getEndTime(); } } /** * Returns the number of words in this hypothesis. * * @return the number of words in this hypothesis */ int getWordCount() { return words.size(); } /** * Returns a list of the words in this hypothesis. * * @return a list of the words in this hypothesis */ List<HypothesisWord> getWords() { List<HypothesisWord> newList = new LinkedList<HypothesisWord>(); newList.addAll(words); return newList; } /** * Returns the start time of this hypothesis. * * @return the start time of this hypothesis */ float getStartTime() { return startTime; } /** * Returns the end time of this hypothesis. * * @return the end time of this hypothesis */ float getEndTime() { return endTime; } } /** * A word in the hypothesis, containing information about when the word started and ended. */ class HypothesisWord { private final String text; private final float startTime; private final float endTime; /** * Constructs a hypothesis word with the given start and end times. * * @param text the text of the hypothesized word * @param startTime the starting time of the word * @param endTime the ending time of the word */ HypothesisWord(String text, float startTime, float endTime) { this.text = text; this.startTime = startTime; this.endTime = endTime; } /** * Returns the text of the word. * * @return the text of the word */ String getText() { return text; } /** * Returns the starting time of the word. * * @return the starting time of the word */ float getStartTime() { return startTime; } /** * Returns the ending time of the word. * * @return the ending time of the word */ float getEndTime() { return endTime; } }