/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.tools.batch; import edu.cmu.sphinx.frontend.BaseDataProcessor; import edu.cmu.sphinx.frontend.DataProcessor; import edu.cmu.sphinx.frontend.util.StreamCepstrumSource; import edu.cmu.sphinx.frontend.util.StreamDataSource; import edu.cmu.sphinx.frontend.util.StreamHTKCepstrum; import edu.cmu.sphinx.recognizer.Recognizer; import edu.cmu.sphinx.recognizer.Recognizer.State; import edu.cmu.sphinx.result.Result; import edu.cmu.sphinx.util.*; import edu.cmu.sphinx.util.props.*; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.UnsupportedAudioFileException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.util.List; import java.util.logging.Logger; /** * Decodes a batch file containing a list of files to decode. The files can be * either audio files or cepstral files, but defaults to audio files. The audio * data should be 16-bit, 16kHz, PCM-linear data. Since this classes makes use * of Java Sound, it supports all the audio file formats that are supported by * Java Sound. If the audio file does not correspond to a format supported by * Java Sound, it is treated as a raw audio file (i.e., one without a header). * Audio file formats differ in the endian order of the audio data. Therefore, * it is important to specify it correctly in the configuration of the <a * href="../../frontend/util/StreamDataSource.html">StreamDataSource</a>. Note * that in the ideal situation, the audio format of the data should be passed * into the StreamDataSource, so that no extra configuration is needed. This * will be fixed in future releases. * <p> * To run this BatchModeRecognizer: * * <pre> * java BatchModeRecognizer <xmlConfigFile> <batchFile> * </pre> * * where <code>xmlConfigFile</code> is an XML-based configuration file and * <code>batchFile</code> is a file listing all the files to decode and * transcript of those files. For information about the configuration file, * refer to the document <a * href="../../util/props/doc-files/ConfigurationManagement.html"> Sphinx-4 * Configuration Management</a>. For information about the batch file, refer to * the <a href="../../../../../../index.html#batch_files"> batch file * description</a>. * <p> * This class will send recognition results to the logger if the log level is * set to INFO. */ public class BatchModeRecognizer implements Configurable { /** * The property or how many files to skip for every decode. */ @S4Integer(defaultValue = 0) public final static String PROP_SKIP = "skip"; /** * The property for how many utterances to process */ @S4Integer(defaultValue = 1000000) public final static String PROP_COUNT = "count"; /** * The property that specified which batch job is to be run. */ @S4Integer(defaultValue = 0) public final static String PROP_WHICH_BATCH = "whichBatch"; /** * The property for the total number of batch jobs the decoding run is being divided into. * <p> * The BatchDecoder supports running a subset of a batch. This allows a test to be distributed among several * machines. */ @S4Integer(defaultValue = 1) public final static String PROP_TOTAL_BATCHES = "totalBatches"; /** * The property that defines whether or not the decoder should use the pooled batch manager */ @S4Boolean(defaultValue = false) public final static String PROP_USE_POOLED_BATCH_MANAGER = "usePooledBatchManager"; /** * The property that specifies the recognizer to use */ @S4Component(type = Recognizer.class) public final static String PROP_RECOGNIZER = "recognizer"; /** * The property that specifies the input source */ @S4ComponentList(type = BaseDataProcessor.class) public final static String PROP_INPUT_DATA_PROCESSORS = "inputDataProcessors"; // ------------------------------- // Configuration data // -------------------------------- protected String name; protected List<DataProcessor> inputDataProcessors; protected int skip; protected int utteranceId; protected int whichBatch; protected int totalBatches; protected boolean usePooledBatchManager; protected BatchManager batchManager; protected Recognizer recognizer; protected Logger logger; protected BatchItem curBatchItem; protected ConfigurationManager cm; public BatchModeRecognizer( Recognizer recognizer, List<DataProcessor> inputDataProcessors, int skip, int utteranceId, int whichBatch, int totalBatches, boolean usePooledBatchManager ) { logger = Logger.getLogger(getClass().getName()); cm = null; this.skip = skip; this.utteranceId = utteranceId; this.whichBatch = whichBatch; this.totalBatches = totalBatches; this.usePooledBatchManager = usePooledBatchManager; this.recognizer = recognizer; this.inputDataProcessors = inputDataProcessors; } public BatchModeRecognizer() { } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ public void newProperties(PropertySheet ps) throws PropertyException { logger = ps.getLogger(); cm = ConfigurationManagerUtils.getPropertyManager(ps); skip = ps.getInt(PROP_SKIP); utteranceId = ps.getInt(PROP_COUNT); if (utteranceId <= 0) { utteranceId = Integer.MAX_VALUE; } whichBatch = ps.getInt(PROP_WHICH_BATCH); totalBatches = ps.getInt(PROP_TOTAL_BATCHES); usePooledBatchManager = ps.getBoolean(PROP_USE_POOLED_BATCH_MANAGER); recognizer = (Recognizer) ps.getComponent(PROP_RECOGNIZER); inputDataProcessors = ps.getComponentList(PROP_INPUT_DATA_PROCESSORS, DataProcessor.class); } /** * Sets the batch file to use for this recognition * * @param batchFile the name of the batch file * @throws IOException if the file could not be opened or read. */ public void setBatchFile(String batchFile) throws IOException { if (usePooledBatchManager) { batchManager = new PooledBatchManager(batchFile, skip); } else { batchManager = new SimpleBatchManager(batchFile, skip, whichBatch, totalBatches); } } /** * Decodes the batch of audio files * @param batchFile file with list of files to decode * @throws IOException if IO went wrong */ public void decode(String batchFile) throws IOException { BatchItem batchItem; int count = 0; try { recognizer.allocate(); setBatchFile(batchFile); batchManager.start(); logger.info("BatchDecoder: decoding files in " + batchManager.getFilename()); while (count < utteranceId && (batchItem = batchManager.getNextItem()) != null) { setInputStream(batchItem.getFilename()); Result result = recognizer.recognize(batchItem.getTranscript()); logger.info("File : " + batchItem.getFilename()); logger.info("Result: " + result); count++; } batchManager.stop(); recognizer.deallocate(); } catch (IOException io) { logger.severe("I/O error during decoding: " + io.getMessage()); throw io; } logger.info("BatchDecoder: " + count + " files decoded"); } /** * Sets the input stream to the given filename * * @param filename the filename to set the input stream to * @throws IOException if an error occurs */ void setInputStream(String filename) throws IOException { for (DataProcessor dataSource : inputDataProcessors) { InputStream is; try { File file = new File(filename); logger.info (AudioSystem.getAudioFileFormat(file).toString()); is = AudioSystem.getAudioInputStream(file); } catch (UnsupportedAudioFileException uafe) { logger.info ("Reading " + filename + " as raw audio file."); is = new FileInputStream(filename); // Total hack: NIST Sphere files aren't supported by // javax.sound, so skip their header if (filename.toLowerCase().endsWith(".sph")) { logger.info("Skipping 1024-byte Sphere header."); is.skip(1024); } } if (dataSource instanceof StreamDataSource) { ((StreamDataSource) dataSource).setInputStream(is); } else if (dataSource instanceof StreamCepstrumSource) { boolean isBigEndian = Utilities .isCepstraFileBigEndian(filename); StreamCepstrumSource cepstrumSource = (StreamCepstrumSource) dataSource; cepstrumSource.setInputStream(is, isBigEndian); // TODO: christophe: should use an interface there !! } else if (dataSource instanceof StreamHTKCepstrum) { StreamHTKCepstrum cepstrumSource = (StreamHTKCepstrum) dataSource; cepstrumSource.setInputStream(is); } } } /** * Add commands to the given interpreter to support shell mode * * @param ci the interpreter */ void addCommands(CommandInterpreter ci) { ci.add("ls", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { if (args.length != 1) { ci.putResponse("Usage: ls"); } else { for (String name : cm.getInstanceNames(Configurable.class)) ci.putResponse(name); } return ""; } public String getHelp() { return "list active components"; } }); ci.add("show", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { if (args.length < 2) { ConfigurationManagerUtils.showConfig(cm); } else { for (int i = 1; i < args.length; i++) { String name = args[i]; ConfigurationManagerUtils.showConfig(cm, name); } } return ""; } public String getHelp() { return "show component configuration"; } }); ci.add("edit", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { if (args.length != 2) { ci.putResponse("Usage: edit component"); } else { try { ConfigurationManagerUtils.editConfig(cm, args[1]); } catch (Exception e) { ci.putResponse("Failed to save " + e); } // cm.editConfig(args[1]); } return ""; } public String getHelp() { return "edit a component's configuration"; } }); ci.add("save", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { if (args.length != 2) { ci.putResponse("Usage: save filename.xml"); } else { ConfigurationManagerUtils.save(cm, new File(args[1])); // cm.save(new File(args[1])); } return ""; } public String getHelp() { return "save configuration to a file"; } }); ci.add("set", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { if (args.length != 4) { ci.putResponse("Usage: set component property value"); } else { // System.err.println("tried to configure the CM with " + args ); ConfigurationManagerUtils.setProperty(BatchModeRecognizer.this.cm, args[1], args[3], args[2]); } return ""; } public String getHelp() { return "set component property to a given value"; } }); ci.add("recognize", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { Result result = null; if (args.length < 2) { ci.putResponse("Usage: recognize audio [transcript]"); } else { String audioFile = args[1]; String transcript = null; if (args.length > 2) { transcript = args[2]; } try { setInputStream(audioFile); result = recognizer.recognize(transcript); } catch (IOException io) { ci.putResponse("I/O error during decoding: " + io.getMessage()); } } return result != null ? result.getBestResultNoFiller() : ""; } public String getHelp() { return "perform recognition on the given audio"; } }); ci.addAlias("recognize", "rec"); ci.add("statsReset", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { if (args.length != 1) { ci.putResponse("Usage: statsReset"); } else { recognizer.resetMonitors(); } return ""; } public String getHelp() { return "resets gathered statistics"; } }); ci.add("batchRecognize", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { Result result = null; if (args.length != 1) { ci.putResponse("Usage: batchRecognize"); } else { try { if (curBatchItem == null) { batchManager.start(); curBatchItem = batchManager.getNextItem(); } String audioFile = curBatchItem.getFilename(); String transcript = curBatchItem.getTranscript(); setInputStream(audioFile); result = recognizer.recognize(transcript); } catch (IOException io) { ci.putResponse("I/O error during decoding: " + io.getMessage()); } } return result != null ? result.getBestResultNoFiller() : ""; } public String getHelp() { return "perform recognition on the current batch item"; } }); ci.addAlias("batchRecognize", "br"); ci.add("batchNext", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { Result result = null; if (args.length != 1 && args.length != 2) { ci.putResponse("Usage: batchNext [norec]"); } else { try { // if we don't have a batch item, start (or // start over) if (curBatchItem == null) { batchManager.start(); } curBatchItem = batchManager.getNextItem(); // if we reach the end, just loop back and // start over. if (curBatchItem == null) { batchManager.start(); curBatchItem = batchManager.getNextItem(); } String audioFile = curBatchItem.getFilename(); String transcript = curBatchItem.getTranscript(); if (args.length == 2) { ci.putResponse("Skipping: " + transcript); } else { setInputStream(audioFile); result = recognizer.recognize(transcript); } } catch (IOException io) { ci.putResponse("I/O error during decoding: " + io.getMessage()); } } return result != null ? result.getBestResultNoFiller() : ""; } public String getHelp() { return "advance the batch and perform recognition"; } }); ci.addAlias("batchNext", "bn"); ci.add("batchAll", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { Result result = null; if (args.length != 1) { ci.putResponse("Usage: batchAll"); } else { try { if (curBatchItem == null) { batchManager.start(); } while (true) { curBatchItem = batchManager.getNextItem(); // if we reach the end bail out if (curBatchItem == null) { return ""; } String audioFile = curBatchItem.getFilename(); String transcript = curBatchItem.getTranscript(); setInputStream(audioFile); result = recognizer.recognize(transcript); } } catch (IOException io) { ci.putResponse("I/O error during decoding: " + io.getMessage()); } } return result != null ? result.getBestResultNoFiller() : ""; } public String getHelp() { return "recognize all of the remaining batch items"; } }); ci.add("batchReset", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { if (args.length != 1) { ci.putResponse("Usage: batchReset"); } else { try { batchManager.start(); } catch (IOException ioe) { ci.putResponse("trouble reseting batch"); } } return ""; } public String getHelp() { return "reset the batch to the beginning"; } }); ci.add("batchLoad", new CommandInterface() { public String execute(CommandInterpreter ci, String[] args) { if (args.length != 2) { ci.putResponse("Usage: batchReset batchfile"); } else { try { setBatchFile(args[1]); } catch (IOException ioe) { ci.putResponse("Can't load " + args[1] + ' ' + ioe); } } return ""; } public String getHelp() { return "reset the batch to the beginning"; } }); } public void shell(String batchfile) throws IOException { try { CommandInterpreter ci = new CommandInterpreter(); ci.setPrompt("s4> "); addCommands(ci); setBatchFile(batchfile); recognizer.allocate(); ci.run(); batchManager.stop(); if (recognizer.getState() == State.READY) { recognizer.deallocate(); } } catch (IOException io) { logger.severe("I/O error during decoding: " + io.getMessage()); throw io; } } /** * Main method of this BatchDecoder. * * @param argv argv[0] : config.xml argv[1] : a file listing all the audio files to decode */ public static void main(String[] argv) { if (argv.length < 2) { System.out.println( "Usage: BatchDecoder propertiesFile batchFile [-shell]"); System.exit(1); } String cmFile = argv[0]; String batchFile = argv[1]; ConfigurationManager cm; BatchModeRecognizer bmr; try { URL url = new File(cmFile).toURI().toURL(); cm = new ConfigurationManager(url); bmr = (BatchModeRecognizer) cm.lookup("batch"); if (bmr == null) { System.err.println("Can't find batchModeRecognizer in " + cmFile); return; } if (argv.length >= 3 && argv[2].equals("-shell")) { bmr.shell(batchFile); } else { bmr.decode(batchFile); } System.out.println("Tokens created: " + StatisticsVariable.getStatisticsVariable("totalTokensScored").value); /* } catch (IOException ioe) { System.err.println("I/O error: \n"); ioe.printStackTrace(); } catch (InstantiationException e) { System.err.println("Error during initialization: \n"); e.printStackTrace(); } catch (PropertyException e) { System.err.println("Error during initialization: \n"); e.printStackTrace(); */ } catch (Exception e) { System.err.println("Error during decoding: \n "); e.printStackTrace(); } } int count; public void start(String batchFile) throws IOException { recognizer.allocate(); setBatchFile(batchFile); batchManager.start(); logger.info("BatchDecoder: decoding files in " + batchManager.getFilename()); count = 0; } public void stop() throws IOException { batchManager.stop(); recognizer.deallocate(); } public Result recognize() throws IOException { Result result = null; BatchItem batchItem; if (count < utteranceId && (batchItem = batchManager.getNextItem()) != null) { setInputStream(batchItem.getFilename()); result = recognizer.recognize(batchItem.getTranscript()); logger.info("File : " + batchItem.getFilename()); logger.info("Result: " + result); count++; } logger.info("BatchDecoder: " + count + " files decoded"); return result; } }