ConcatFileDataSource.java example

Explorer
Sphinx-master
- sphinx4-master
  - sphinx4-core
    - src
      - main
        java
        edu
        cmu
        sphinx
        alignment
        api
        decoder
        frontend
        AutoCepstrum.java
        BaseDataProcessor.java
        Data.java
        DataBlocker.java
        DataEndSignal.java
        DataProcessingException.java
        DataProcessor.java
        DataStartSignal.java
        DoubleData.java
        FloatData.java
        FrontEnd.java
        GainControlProcessor.java
        Signal.java
        SignalListener.java
        databranch
        denoise
        endpoint
        feature
        filter
        frequencywarp
        transform
        util
        AudioContinuityTester.java
        AudioFileDataSource.java
        AudioFileProcessListener.java
        ConcatAudioFileDataSource.java
        ConcatFileDataSource.java
        DataConverter.java
        DataDumper.java
        DataUtil.java
        EnergyPlotter.java
        FrontEndUtils.java
        Microphone.java
        StreamCepstrumSource.java
        StreamDataSource.java
        StreamHTKCepstrum.java
        Utterance.java
        VUMeter.java
        VUMeterMonitor.java
        VUMeterPanel.java
        WavWriter.java
        window
        fst
        instrumentation
        jsgf
        linguist
        recognizer
        result
        speakerid
        tools
        trainer
        util
      - test
  - sphinx4-samples
/*
* Copyright 1999-2002 Carnegie Mellon University.
* Portions Copyright 2002 Sun Microsystems, Inc.
* Portions Copyright 2002 Mitsubishi Electric Research Laboratories.
* All Rights Reserved.  Use is subject to license terms.
*
* See the file "license.terms" for information on usage and
* redistribution of this file, and for a DISCLAIMER OF ALL
* WARRANTIES.
*
*/
package edu.cmu.sphinx.frontend.util;

import edu.cmu.sphinx.util.BatchFile;
import edu.cmu.sphinx.util.ReferenceSource;
import edu.cmu.sphinx.util.props.*;

import java.io.*;
import java.util.Enumeration;
import java.util.LinkedList;
import java.util.List;
import java.util.Random;


/**
 * Concatenates a list raw headerless audio files as one continuous audio stream. A {@link
 * edu.cmu.sphinx.frontend.DataStartSignal DataStartSignal} will be placed before the start of the first file, and a
 * {@link edu.cmu.sphinx.frontend.DataEndSignal DataEndSignal} after the last file. No DataStartSignal or DataEndSignal
 * will be placed between them. Optionally, silence can be added in-between the audio files by setting the property:
 * <pre>edu.cmu.sphinx.frontend.util.ConcatFileDataSource.silenceFile</pre>
 * to a audio file for silence. By default, no silence is added. Moreover, one can also specify how many files to skip
 * for every file read.
 * <p>
 * You can also specify the name of a transcript file to write the transcription to. The transcription will be written
 * in HUB-4 style. A sample HUB-4 transcript looks like:
 * <pre>
 * bn99en_1 1 peter_jennings 0.806084 7.079850 <o,f4,male> Tonight this
 * Thursday big pressure on the Clinton administration to do something about
 * the latest killing in Yugoslavia
 * bn99en_1 1 peter_jennings 7.079850 14.007608 <o,fx,male> Airline passengers
 * and outrageous behavior at thirty thousand feet What can an airline do
 * ...
 * bn99en_1 1 inter_segment_gap 23.097000 28.647000 <o,fx,>
 * ...
 * </pre>
 * The format of each line is:
 * <pre>
 * test_set_name category speaker_name start_time_in_seconds
 * end_time_in_seconds <category,hub4_focus_conditions,speaker_sex> transcript
 * </pre>
 * In our example above,
 * <pre>
 * test_set_name is "bn99en_1"
 * category is "1"
 * speaker_name is "peter_jennings"
 * start_time_in_seconds is "0.806084"
 * end_time_in_seconds is "7.079850"
 * category is "o" for "Overall"
 * hub4_focus_conditions is:
 *     "f0" for "Baseline//Broadcast//Speech"
 *     "f1" for "Spontaneous//Broadcast//Speech"
 *     "f2" for "Speech Over//Telephone//Channels"
 *     "f3" for "Speech in the//Presence of//Background Music"
 *     "f4" for "Speech Under//Degraded//Acoustic Conditions"
 *     "f5" for "Speech from//Non-Native//Speakers"
 *     "fx" for "All other speech"
 * speaker_sex is "male"
 * transcript is "Tonight this Thursday big pressure on the Clinton
 * administration to do something about the latest killing in Yugoslavia
 * </pre>
 * The ConcatFileDataSource will produce such a transcript if the name of the file to write to is supplied in the
 * constructor. This transcript file will be used in detected gap insertion errors, because it accurately describes the
 * "correct" sequence of speech and silences in the concatenated version of the audio files.
 */
public class ConcatFileDataSource extends StreamDataSource implements ReferenceSource {

    /** The property that specifies which file to start at. */
    @S4Integer(defaultValue = 1)
    public static final String PROP_START_FILE = "startFile";

    /** The property that specifies the number of files to skip for every file read. */
    @S4Integer(defaultValue = 0)
    public static final String PROP_SKIP = "skip";

    /** The property that specifies the total number of files to read. The default value should be no limit. */
    @S4Integer(defaultValue = -1)
    public static final String PROP_TOTAL_FILES = "totalFiles";

    /**
     * The property that specifies the silence audio file, if any. If this property is null, then no silences are
     * added in between files.
     */
    @S4String
    public static final String PROP_SILENCE_FILE = "silenceFile";

    /** The property that specifies whether to add random silence. */
    @S4Boolean(defaultValue = false)
    public static final String PROP_ADD_RANDOM_SILENCE = "addRandomSilence";

    /**
     * The property that specifies the maximum number of times the silence file is added  between files. If
     * PROP_ADD_RANDOM_SILENCE is set to true, the number of times the silence file is added is between 1 and this
     * value. If PROP_ADD_RANDOM_SILENCE is set to false, this value will be the number of times the silence file is
     * added. So if PROP_MAX_SILENCE is set to 3, then the silence file will be added three times between files.
     */
    @S4Integer(defaultValue = 3)
    public static final String PROP_MAX_SILENCE = "maxSilence";

    /**
     * The property that specifies the name of the transcript file. If this property is set, a transcript file
     * will be created. No transcript file will be created if this property is not set.
     */
    @S4String
    public static final String PROP_TRANSCRIPT_FILE = "transcriptFile";

    /** The property for the file containing a list of audio files to read from. */
    @S4String
    public static final String PROP_BATCH_FILE = "batchFile";


    private static final String GAP_LABEL = "inter_segment_gap";
    private boolean addRandomSilence;
    private int skip;
    private int maxSilence;
    private int silenceCount;
    private int bytesPerSecond;
    private long totalBytes;
    private long silenceFileLength;
    private String silenceFileName;
    private String nextFile;
    private String context;
    private String transcriptFile;
    private List<String> referenceList;
    private FileWriter transcript;
    private int startFile;
    private int totalFiles;
    private String batchFile;

    public ConcatFileDataSource( int sampleRate, int bytesPerRead, int bitsPerSample, boolean bigEndian, boolean signedData,
        boolean addRandomSilence,
        int maxSilence,
        int skip,
        String silenceFileName,
        int startFile,
        int totalFiles,
        String transcriptFile,
        String batchFile) {
        super(sampleRate,bytesPerRead,bitsPerSample,bigEndian,signedData );

        this.bytesPerSecond = sampleRate * (bitsPerSample / 8);
        this.addRandomSilence = addRandomSilence;
        this.maxSilence = maxSilence;
        this.skip = skip;
        this.silenceFileName = silenceFileName;
        this.startFile = startFile;
        this.totalFiles = totalFiles;
        this.transcriptFile = transcriptFile;
        this.batchFile = batchFile;
    }

    public ConcatFileDataSource() {
        
    }

    /*
    * (non-Javadoc)
    *
    * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet)
    */
    @Override
    public void newProperties(PropertySheet ps) throws PropertyException {
        super.newProperties(ps);

        bytesPerSecond = sampleRate * (bitsPerSample / 8);
        addRandomSilence = ps.getBoolean(PROP_ADD_RANDOM_SILENCE);
        maxSilence = ps.getInt(PROP_MAX_SILENCE);
        skip = ps.getInt(PROP_SKIP);
        silenceFileName = ps.getString(PROP_SILENCE_FILE);
        startFile = ps.getInt(PROP_START_FILE);
        totalFiles = ps.getInt(PROP_TOTAL_FILES);
        transcriptFile = ps.getString(PROP_TRANSCRIPT_FILE);
        batchFile = ps.getString(PROP_BATCH_FILE);
    }


    /** Initializes a ConcatFileDataSource. */
    @Override
    public void initialize() {
        super.initialize();

        try {
            File silenceFile = new File(silenceFileName);
            silenceFileLength = silenceFile.length();

            if (transcriptFile != null) {
                transcript = new FileWriter(transcriptFile);
            }
            if (batchFile == null) {
                throw new Error("BatchFile cannot be null!");
            }
            setInputStream
                    (new SequenceInputStream
                            (new InputStreamEnumeration
                                    (batchFile, startFile, totalFiles)));
            referenceList = new LinkedList<String>();
        } catch (IOException e) {
            e.printStackTrace(); //TODO fix this
        }
    }


    /**
     * Returns a list of all reference text. Implements the getReferences() method of ReferenceSource.
     *
     * @return a list of all reference text
     */
    public List<String> getReferences() {
        return referenceList;
    }


    /**
     * Returns the name of the transcript file.
     *
     * @return the name of the transcript file
     */
    public String getTranscriptFile() {
        return transcriptFile;
    }


    /**
     * Returns the audio time in seconds represented by the given number of bytes.
     *
     * @param bytes the number of bytes
     * @return the audio time
     */
    private float getSeconds(long bytes) {
        return ((float) bytes / bytesPerSecond);
    }


    /**
     * The work of the concatenating of the audio files are done here. The idea here is to turn the list of audio files
     * into an Enumeration, and then fed it to a SequenceInputStream, giving the illusion that the audio files are
     * concatenated, but only logically.
     */
    class InputStreamEnumeration implements Enumeration<InputStream> {

        private final int totalFiles;
        private boolean inSilence;
        private Random silenceRandom;
        private BufferedReader reader;


        InputStreamEnumeration(String batchFile, int startFile,
                               int totalFiles)
                throws IOException {
            this.totalFiles = totalFiles;
            reader = new BufferedReader(new FileReader(batchFile));
            if (silenceFileName != null) {
                inSilence = true;
                silenceRandom = new Random(System.currentTimeMillis());
                silenceCount = getSilenceCount();
            }
            // go to the start file
            for (int i = 1; i < startFile; i++) {
                reader.readLine();
            }
        }


        /**
         * Tests if this enumeration contains more elements.
         *
         * @return true if and only if this enumeration object contains at least one more element to provide; false
         *         otherwise.
         */
        public boolean hasMoreElements() {
            if (nextFile == null) {
                nextFile = readNext();
            }
            return (nextFile != null);
        }


        /**
         * Returns the next element of this enumeration if this enumeration object has at least one more element to
         * provide.
         *
         * @return the next element of this enumeration.
         */
        public InputStream nextElement() {
            InputStream stream = null;
            if (nextFile == null) {
                nextFile = readNext();
            }
            if (nextFile != null) {
                try {
                    stream = new FileInputStream(nextFile);
                    // System.out.println(nextFile);
                    nextFile = null;
                } catch (IOException ioe) {
                    ioe.printStackTrace();
                    throw new Error("Cannot convert " + nextFile +
                            " to a FileInputStream");
                }
            }

            // close the transcript file no more files
            if (stream == null && transcript != null) {
                try {
                    transcript.close();
                } catch (IOException ioe) {
                    ioe.printStackTrace();
                }
            }
            return stream;
        }


        /**
         * Returns the name of next audio file, taking into account file skipping and the adding of silence.
         *
         * @return the name of the appropriate audio file
         */
        public String readNext() {
            if (!inSilence) {
                return readNextDataFile();
            } else {
                // return the silence file
                String next = null;
                if (silenceCount > 0) {
                    next = silenceFileName;
                    if (transcript != null) {
                        writeSilenceToTranscript();
                    }
                    silenceCount--;
                    if (silenceCount <= 0) {
                        inSilence = false;
                    }
                }
                return next;
            }
        }


        /**
         * Returns the next audio file.
         *
         * @return the name of the next audio file
         */
        private String readNextDataFile() {
            try {
                if (0 <= totalFiles &&
                        totalFiles <= referenceList.size()) {
                    return null;
                }
                String next = reader.readLine();
                if (next != null) {
                    String reference = BatchFile.getReference(next);
                    referenceList.add(reference);
                    next = BatchFile.getFilename(next);
                    for (int i = 1; i < skip; i++) {
                        reader.readLine();
                    }
                    if (silenceFileName != null && maxSilence > 0) {
                        silenceCount = getSilenceCount();
                        inSilence = true;
                    }
                    if (transcript != null) {
                        writeTranscript(next, reference);
                    }
                }
                return next;
            } catch (IOException ioe) {
                ioe.printStackTrace();
                throw new Error("Problem reading from batch file");
            }
        }


        /**
         * Writes the transcript file.
         *
         * @param fileName  the name of the decoded file
         * @param reference the reference text
         */
        private void writeTranscript(String fileName, String reference) {
            try {
                File file = new File(fileName);
                float start = getSeconds(totalBytes);
                totalBytes += file.length();
                float end = getSeconds(totalBytes);
                transcript.write(context + " 1 " + fileName + ' ' + start +
                    ' ' + end + "  " + reference + '\n');
                transcript.flush();
            } catch (IOException ioe) {
                ioe.printStackTrace();
            }
        }


        /** Writes silence to the transcript file. */
        private void writeSilenceToTranscript() {
            try {
                float start = getSeconds(totalBytes);
                totalBytes += silenceFileLength;
                float end = getSeconds(totalBytes);
                transcript.write(context + " 1 " + GAP_LABEL + ' ' +
                        start + ' ' + end + " \n");
                transcript.flush();
            } catch (IOException ioe) {
                ioe.printStackTrace();
            }
        }


        /**
         * Returns how many times the silence file should be added between utterances.
         *
         * @return the number of times the silence file should be added between utterances
         */
        private int getSilenceCount() {
            if (addRandomSilence) {
                return silenceRandom.nextInt(maxSilence) + 1;
            } else {
                return maxSilence;
            }
        }
    }
}