/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.tools.live; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.net.URL; import java.util.LinkedList; import java.util.List; import edu.cmu.sphinx.frontend.util.ConcatFileDataSource; import edu.cmu.sphinx.recognizer.Recognizer; import edu.cmu.sphinx.result.Result; import edu.cmu.sphinx.result.WordResult; import edu.cmu.sphinx.util.*; import edu.cmu.sphinx.util.props.*; /** * Decodes a batch file containing a list of files to decode. The files can be either audio files or cepstral files, but * defaults to audio files. */ public class LiveModeRecognizer implements Configurable { /** The property that specifies the recognizer to use */ @S4Component(type = Recognizer.class) public final static String PROP_RECOGNIZER = "recognizer"; /** The property that specifies the source of the transcript */ @S4Component(type = ConcatFileDataSource.class) public final static String PROP_INPUT_SOURCE = "inputSource"; /** The property specifying whether to print out the gap insertion errors. */ @S4Boolean(defaultValue = false) public static final String PROP_SHOW_GAP_INSERTIONS = "showGapInsertions"; /** The property specifying the transcript file. */ @S4String(defaultValue = "hypothesis.txt") public final static String PROP_HYPOTHESIS_TRANSCRIPT = "hypothesisTranscript"; /** The property specifying the number of files to decode before alignment is performed. */ @S4Integer(defaultValue = -1) public final static String PROP_ALIGN_INTERVAL = "alignInterval"; // TODO - the instrumentation in here that is looking for gap insertions // and performing the alignment and reporting of the live summary data // should probably be moved to a separate instrumentation package, much // like the BestPathAccuracyTracker. // ------------------------------- // Configuration data // -------------------------------- private Recognizer recognizer; private ConcatFileDataSource dataSource; private String hypothesisFile; private boolean showGapInsertions; // ------------------------------- // Working data // -------------------------------- private int alignInterval; private int numUtterances; private FileWriter hypothesisTranscript; private ReferenceSource referenceSource; private final NISTAlign aligner = new NISTAlign(true, true); public LiveModeRecognizer( Recognizer recognizer, ConcatFileDataSource dataSource, int skip, boolean showGapInsertions, String hypothesisFile, int alignInterval ) { this.recognizer = recognizer; this.dataSource = dataSource; this.showGapInsertions = showGapInsertions; this.hypothesisFile = hypothesisFile; this.alignInterval = alignInterval; this.referenceSource = dataSource; } public LiveModeRecognizer() { } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ public void newProperties(PropertySheet ps) throws PropertyException { recognizer = (Recognizer) ps.getComponent(PROP_RECOGNIZER); dataSource = (ConcatFileDataSource) ps.getComponent(PROP_INPUT_SOURCE); showGapInsertions = ps.getBoolean(PROP_SHOW_GAP_INSERTIONS); hypothesisFile = ps.getString(PROP_HYPOTHESIS_TRANSCRIPT); alignInterval = ps.getInt(PROP_ALIGN_INTERVAL); referenceSource = dataSource; } /** Decodes the batch of audio files * @throws IOException if IO went wrong */ public void decode() throws IOException { List<String> resultList = new LinkedList<String>(); Result result; int startReference = 0; hypothesisTranscript = new FileWriter(hypothesisFile); recognizer.allocate(); while ((result = recognizer.recognize()) != null) { numUtterances++; String resultText = result.getBestResultNoFiller(); System.out.println("\nHYP: " + resultText); System.out.println(" Sentences: " + numUtterances); resultList.add(resultText); for (WordResult wr : result.getTimedBestResult(false)) { hypothesisTranscript.write(wr.toString()); hypothesisTranscript.write(' '); } hypothesisTranscript.write('\n'); hypothesisTranscript.flush(); if (alignInterval > 0 && (numUtterances % alignInterval == 0)) { // perform alignment if the property 'alignInterval' is set List<String> references = referenceSource.getReferences(); List<String> section = references.subList(startReference, references .size()); alignResults(resultList, section); resultList = new LinkedList<String>(); startReference = references.size(); } } hypothesisTranscript.close(); // perform alignment on remaining results List<String> references = referenceSource.getReferences(); List<String> section = references.subList(startReference, references.size()); if (!resultList.isEmpty() || !section.isEmpty()) { alignResults(resultList, section); } System.out.println("# ------------- Summary Statistics -------------"); aligner.printTotalSummary(); recognizer.deallocate(); showLiveSummary(); System.out.println(); } /** Shows the test statistics that relates to live mode decoding. */ private void showLiveSummary() throws IOException { int actualUtterances = referenceSource.getReferences().size(); int gapInsertions = detectGapInsertionErrors(); System.out.println (" Utterances: Actual: " + actualUtterances + " Found: " + numUtterances); System.out.println (" Gap Insertions: " + gapInsertions); } /** Detect gap insertion errors. */ private int detectGapInsertionErrors() throws IOException { Timer gapTimer = TimerPool.getTimer(this, "GapInsertionDetector"); gapTimer.start(); GapInsertionDetector gid = new GapInsertionDetector(dataSource .getTranscriptFile(), hypothesisFile, showGapInsertions); int gapInsertions = gid.detect(); gapTimer.stop(); return gapInsertions; } /** * Align the list of results with reference text. This method figures out how many words and sentences match, and * the different types of errors. * * @param hypothesisList the list of hypotheses * @param referenceList the list of references */ private void alignResults(List<String> hypothesisList, List<String> referenceList) { System.out.println(); System.out.println("Aligning results..."); System.out.println(" Utterances: Found: " + hypothesisList.size() + " Actual: " + referenceList.size()); String hypothesis = listToString(hypothesisList); String reference = listToString(referenceList); saveAlignedText(hypothesis, reference); getAlignTimer().start(); aligner.align(reference, hypothesis); getAlignTimer().stop(); System.out.println(" ...done aligning"); System.out.println(); } /** * Saves the aligned hypothesis and reference text to the aligned text file. * * @param hypothesis the aligned hypothesis text * @param reference the aligned reference text */ private void saveAlignedText(String hypothesis, String reference) { try { FileWriter writer = new FileWriter("align.txt"); writer.write(hypothesis); writer.write("\n"); writer.write(reference); writer.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } /** * Converts the given list of strings into one string, putting a space character in between the strings. * * @param resultList the list of strings * @return a string which is a concatenation of the strings in the list, separated by a space character */ private String listToString(List<String> resultList) { StringBuilder sb = new StringBuilder(); for (String result : resultList) { sb.append(result).append(' '); } return sb.toString(); } /** Return the timer for alignment. */ private Timer getAlignTimer() { return TimerPool.getTimer(this, "Align"); } /** Do clean up * @throws IOException if something went wrong */ public void close() throws IOException { hypothesisTranscript.close(); } /** * Main method of this BatchDecoder. * * @param argv argv[0] : config file argv[1] : a file listing all the audio files to decode */ public static void main(String[] argv) { if (argv.length != 1) { System.out.println("Usage: LiveModeRecognizer config-file.xml "); System.exit(1); } String cmFile = argv[0]; ConfigurationManager cm; LiveModeRecognizer lmr; try { URL url = new File(cmFile).toURI().toURL(); cm = new ConfigurationManager(url); lmr = (LiveModeRecognizer) cm.lookup("live"); } catch (IOException ioe) { System.err.println("I/O error during initialization: \n " + ioe); return; } catch (PropertyException e) { System.err.println("Error during initialization: \n " + e); e.printStackTrace(); return; } if (lmr == null) { System.err.println("Can't find liveModeRecognizer in " + cmFile); return; } try { lmr.decode(); } catch (IOException ioe) { System.err .println("I/O error during decoding: " + ioe.getMessage()); } } // // @Test // public void testUseConcatDataSource() { // Map<String, Object> props = new HashMap<String, Object>(); // props.put("inputSource", new ConcatFileDataSource()); // props.put("recognizer", new Recognizer()); // LiveModeRecognizer liveRecognizer = (LiveModeRecognizer) ConfigurationManager.getInstance(LiveModeRecognizer.class, props); // // } }