/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.frontend.util; import edu.cmu.sphinx.util.BatchFile; import edu.cmu.sphinx.util.ReferenceSource; import edu.cmu.sphinx.util.props.*; import java.io.*; import java.util.Enumeration; import java.util.LinkedList; import java.util.List; import java.util.Random; /** * Concatenates a list raw headerless audio files as one continuous audio stream. A {@link * edu.cmu.sphinx.frontend.DataStartSignal DataStartSignal} will be placed before the start of the first file, and a * {@link edu.cmu.sphinx.frontend.DataEndSignal DataEndSignal} after the last file. No DataStartSignal or DataEndSignal * will be placed between them. Optionally, silence can be added in-between the audio files by setting the property: * <pre>edu.cmu.sphinx.frontend.util.ConcatFileDataSource.silenceFile</pre> * to a audio file for silence. By default, no silence is added. Moreover, one can also specify how many files to skip * for every file read. * <p> * You can also specify the name of a transcript file to write the transcription to. The transcription will be written * in HUB-4 style. A sample HUB-4 transcript looks like: * <pre> * bn99en_1 1 peter_jennings 0.806084 7.079850 <o,f4,male> Tonight this * Thursday big pressure on the Clinton administration to do something about * the latest killing in Yugoslavia * bn99en_1 1 peter_jennings 7.079850 14.007608 <o,fx,male> Airline passengers * and outrageous behavior at thirty thousand feet What can an airline do * ... * bn99en_1 1 inter_segment_gap 23.097000 28.647000 <o,fx,> * ... * </pre> * The format of each line is: * <pre> * test_set_name category speaker_name start_time_in_seconds * end_time_in_seconds <category,hub4_focus_conditions,speaker_sex> transcript * </pre> * In our example above, * <pre> * test_set_name is "bn99en_1" * category is "1" * speaker_name is "peter_jennings" * start_time_in_seconds is "0.806084" * end_time_in_seconds is "7.079850" * category is "o" for "Overall" * hub4_focus_conditions is: * "f0" for "Baseline//Broadcast//Speech" * "f1" for "Spontaneous//Broadcast//Speech" * "f2" for "Speech Over//Telephone//Channels" * "f3" for "Speech in the//Presence of//Background Music" * "f4" for "Speech Under//Degraded//Acoustic Conditions" * "f5" for "Speech from//Non-Native//Speakers" * "fx" for "All other speech" * speaker_sex is "male" * transcript is "Tonight this Thursday big pressure on the Clinton * administration to do something about the latest killing in Yugoslavia * </pre> * The ConcatFileDataSource will produce such a transcript if the name of the file to write to is supplied in the * constructor. This transcript file will be used in detected gap insertion errors, because it accurately describes the * "correct" sequence of speech and silences in the concatenated version of the audio files. */ public class ConcatFileDataSource extends StreamDataSource implements ReferenceSource { /** The property that specifies which file to start at. */ @S4Integer(defaultValue = 1) public static final String PROP_START_FILE = "startFile"; /** The property that specifies the number of files to skip for every file read. */ @S4Integer(defaultValue = 0) public static final String PROP_SKIP = "skip"; /** The property that specifies the total number of files to read. The default value should be no limit. */ @S4Integer(defaultValue = -1) public static final String PROP_TOTAL_FILES = "totalFiles"; /** * The property that specifies the silence audio file, if any. If this property is null, then no silences are * added in between files. */ @S4String public static final String PROP_SILENCE_FILE = "silenceFile"; /** The property that specifies whether to add random silence. */ @S4Boolean(defaultValue = false) public static final String PROP_ADD_RANDOM_SILENCE = "addRandomSilence"; /** * The property that specifies the maximum number of times the silence file is added between files. If * PROP_ADD_RANDOM_SILENCE is set to true, the number of times the silence file is added is between 1 and this * value. If PROP_ADD_RANDOM_SILENCE is set to false, this value will be the number of times the silence file is * added. So if PROP_MAX_SILENCE is set to 3, then the silence file will be added three times between files. */ @S4Integer(defaultValue = 3) public static final String PROP_MAX_SILENCE = "maxSilence"; /** * The property that specifies the name of the transcript file. If this property is set, a transcript file * will be created. No transcript file will be created if this property is not set. */ @S4String public static final String PROP_TRANSCRIPT_FILE = "transcriptFile"; /** The property for the file containing a list of audio files to read from. */ @S4String public static final String PROP_BATCH_FILE = "batchFile"; private static final String GAP_LABEL = "inter_segment_gap"; private boolean addRandomSilence; private int skip; private int maxSilence; private int silenceCount; private int bytesPerSecond; private long totalBytes; private long silenceFileLength; private String silenceFileName; private String nextFile; private String context; private String transcriptFile; private List<String> referenceList; private FileWriter transcript; private int startFile; private int totalFiles; private String batchFile; public ConcatFileDataSource( int sampleRate, int bytesPerRead, int bitsPerSample, boolean bigEndian, boolean signedData, boolean addRandomSilence, int maxSilence, int skip, String silenceFileName, int startFile, int totalFiles, String transcriptFile, String batchFile) { super(sampleRate,bytesPerRead,bitsPerSample,bigEndian,signedData ); this.bytesPerSecond = sampleRate * (bitsPerSample / 8); this.addRandomSilence = addRandomSilence; this.maxSilence = maxSilence; this.skip = skip; this.silenceFileName = silenceFileName; this.startFile = startFile; this.totalFiles = totalFiles; this.transcriptFile = transcriptFile; this.batchFile = batchFile; } public ConcatFileDataSource() { } /* * (non-Javadoc) * * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ @Override public void newProperties(PropertySheet ps) throws PropertyException { super.newProperties(ps); bytesPerSecond = sampleRate * (bitsPerSample / 8); addRandomSilence = ps.getBoolean(PROP_ADD_RANDOM_SILENCE); maxSilence = ps.getInt(PROP_MAX_SILENCE); skip = ps.getInt(PROP_SKIP); silenceFileName = ps.getString(PROP_SILENCE_FILE); startFile = ps.getInt(PROP_START_FILE); totalFiles = ps.getInt(PROP_TOTAL_FILES); transcriptFile = ps.getString(PROP_TRANSCRIPT_FILE); batchFile = ps.getString(PROP_BATCH_FILE); } /** Initializes a ConcatFileDataSource. */ @Override public void initialize() { super.initialize(); try { File silenceFile = new File(silenceFileName); silenceFileLength = silenceFile.length(); if (transcriptFile != null) { transcript = new FileWriter(transcriptFile); } if (batchFile == null) { throw new Error("BatchFile cannot be null!"); } setInputStream (new SequenceInputStream (new InputStreamEnumeration (batchFile, startFile, totalFiles))); referenceList = new LinkedList<String>(); } catch (IOException e) { e.printStackTrace(); //TODO fix this } } /** * Returns a list of all reference text. Implements the getReferences() method of ReferenceSource. * * @return a list of all reference text */ public List<String> getReferences() { return referenceList; } /** * Returns the name of the transcript file. * * @return the name of the transcript file */ public String getTranscriptFile() { return transcriptFile; } /** * Returns the audio time in seconds represented by the given number of bytes. * * @param bytes the number of bytes * @return the audio time */ private float getSeconds(long bytes) { return ((float) bytes / bytesPerSecond); } /** * The work of the concatenating of the audio files are done here. The idea here is to turn the list of audio files * into an Enumeration, and then fed it to a SequenceInputStream, giving the illusion that the audio files are * concatenated, but only logically. */ class InputStreamEnumeration implements Enumeration<InputStream> { private final int totalFiles; private boolean inSilence; private Random silenceRandom; private BufferedReader reader; InputStreamEnumeration(String batchFile, int startFile, int totalFiles) throws IOException { this.totalFiles = totalFiles; reader = new BufferedReader(new FileReader(batchFile)); if (silenceFileName != null) { inSilence = true; silenceRandom = new Random(System.currentTimeMillis()); silenceCount = getSilenceCount(); } // go to the start file for (int i = 1; i < startFile; i++) { reader.readLine(); } } /** * Tests if this enumeration contains more elements. * * @return true if and only if this enumeration object contains at least one more element to provide; false * otherwise. */ public boolean hasMoreElements() { if (nextFile == null) { nextFile = readNext(); } return (nextFile != null); } /** * Returns the next element of this enumeration if this enumeration object has at least one more element to * provide. * * @return the next element of this enumeration. */ public InputStream nextElement() { InputStream stream = null; if (nextFile == null) { nextFile = readNext(); } if (nextFile != null) { try { stream = new FileInputStream(nextFile); // System.out.println(nextFile); nextFile = null; } catch (IOException ioe) { ioe.printStackTrace(); throw new Error("Cannot convert " + nextFile + " to a FileInputStream"); } } // close the transcript file no more files if (stream == null && transcript != null) { try { transcript.close(); } catch (IOException ioe) { ioe.printStackTrace(); } } return stream; } /** * Returns the name of next audio file, taking into account file skipping and the adding of silence. * * @return the name of the appropriate audio file */ public String readNext() { if (!inSilence) { return readNextDataFile(); } else { // return the silence file String next = null; if (silenceCount > 0) { next = silenceFileName; if (transcript != null) { writeSilenceToTranscript(); } silenceCount--; if (silenceCount <= 0) { inSilence = false; } } return next; } } /** * Returns the next audio file. * * @return the name of the next audio file */ private String readNextDataFile() { try { if (0 <= totalFiles && totalFiles <= referenceList.size()) { return null; } String next = reader.readLine(); if (next != null) { String reference = BatchFile.getReference(next); referenceList.add(reference); next = BatchFile.getFilename(next); for (int i = 1; i < skip; i++) { reader.readLine(); } if (silenceFileName != null && maxSilence > 0) { silenceCount = getSilenceCount(); inSilence = true; } if (transcript != null) { writeTranscript(next, reference); } } return next; } catch (IOException ioe) { ioe.printStackTrace(); throw new Error("Problem reading from batch file"); } } /** * Writes the transcript file. * * @param fileName the name of the decoded file * @param reference the reference text */ private void writeTranscript(String fileName, String reference) { try { File file = new File(fileName); float start = getSeconds(totalBytes); totalBytes += file.length(); float end = getSeconds(totalBytes); transcript.write(context + " 1 " + fileName + ' ' + start + ' ' + end + " " + reference + '\n'); transcript.flush(); } catch (IOException ioe) { ioe.printStackTrace(); } } /** Writes silence to the transcript file. */ private void writeSilenceToTranscript() { try { float start = getSeconds(totalBytes); totalBytes += silenceFileLength; float end = getSeconds(totalBytes); transcript.write(context + " 1 " + GAP_LABEL + ' ' + start + ' ' + end + " \n"); transcript.flush(); } catch (IOException ioe) { ioe.printStackTrace(); } } /** * Returns how many times the silence file should be added between utterances. * * @return the number of times the silence file should be added between utterances */ private int getSilenceCount() { if (addRandomSilence) { return silenceRandom.nextInt(maxSilence) + 1; } else { return maxSilence; } } } }