/* * Copyright 1999-2002 Carnegie Mellon University. * Portions Copyright 2002 Sun Microsystems, Inc. * Portions Copyright 2002 Mitsubishi Electric Research Laboratories. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.frontend.feature; import edu.cmu.sphinx.frontend.*; import edu.cmu.sphinx.frontend.endpoint.*; import edu.cmu.sphinx.util.props.PropertyException; import edu.cmu.sphinx.util.props.PropertySheet; import java.text.DecimalFormat; import java.text.DecimalFormatSymbols; import java.util.*; /** * Applies cepstral mean normalization (CMN), sometimes called channel mean normalization, to incoming cepstral data. * * Its goal is to reduce the distortion caused by the transmission channel. The output is mean normalized cepstral * data. * <p> * The CMN processing subtracts the mean from all the {@link Data} objects between a {@link * edu.cmu.sphinx.frontend.DataStartSignal} and a {@link DataEndSignal} or between a {@link * edu.cmu.sphinx.frontend.endpoint.SpeechStartSignal} and a {@link SpeechEndSignal}. BatchCMN will read in all the {@link Data} * objects, calculate the mean, and subtract this mean from all the {@link Data} objects. For a given utterance, it will * only produce an output after reading all the incoming data for the utterance. As a result, this process can introduce * a significant processing delay, which is acceptable for batch processing, but not for live mode. In the latter case, * one should use the {@link LiveCMN}. * <p> * CMN is a technique used to reduce distortions that are introduced by the transfer function of the transmission * channel (e.g., the microphone). Using a transmission channel to transmit the input speech translates to multiplying * the spectrum of the input speech with the transfer function of the channel (the distortion). Since the cepstrum is * the Fourier Transform of the log spectrum, the logarithm turns the multiplication into a summation. Averaging over * time, the mean is an estimate of the channel, which remains roughly constant. The channel is thus removed from the * cepstrum by subtracting the mean cepstral vector. Intuitively, the mean cepstral vector approximately describes the * spectral characteristics of the transmission channel (e.g., microphone). * * @see LiveCMN */ public class BatchCMN extends BaseDataProcessor { private double[] sums; // array of current sums private List<Data> cepstraList; private int numberDataCepstra; private DecimalFormat formatter = new DecimalFormat("0.00;-0.00", new DecimalFormatSymbols(Locale.US));; public BatchCMN() { initLogger(); } /* (non-Javadoc) * @see edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util.props.PropertySheet) */ @Override public void newProperties(PropertySheet ps) throws PropertyException { super.newProperties(ps); } /** Initializes this BatchCMN. */ @Override public void initialize() { super.initialize(); sums = null; cepstraList = new LinkedList<Data>(); } /** Initializes the sums array and clears the cepstra list. */ private void reset() { sums = null; // clears the sums array cepstraList.clear(); numberDataCepstra = 0; } /** * Returns the next Data object, which is a normalized cepstrum. Signal objects are returned unmodified. * * @return the next available Data object, returns null if no Data object is available * @throws DataProcessingException if there is an error processing data */ @Override public Data getData() throws DataProcessingException { Data output = null; if (!cepstraList.isEmpty()) { output = cepstraList.remove(0); } else { reset(); // read the cepstra of the entire utterance, calculate // and apply the cepstral mean if (readUtterance() > 0) { normalizeList(); output = cepstraList.remove(0);//getData(); } } return output; } /** * Reads the cepstra of the entire Utterance into the cepstraList. * * @return the number cepstra (with Data) read * @throws DataProcessingException if an error occurred reading the Data */ private int readUtterance() throws DataProcessingException { Data input = null; do { input = getPredecessor().getData(); if (input != null) { if (input instanceof DoubleData) { double[] cepstrumData = ((DoubleData) input).getValues(); if (sums == null) { sums = new double[cepstrumData.length]; } else { if (sums.length != cepstrumData.length) { throw new Error ("Inconsistent cepstrum lengths: sums: " + sums.length + ", cepstrum: " + cepstrumData.length); } } if (cepstrumData[0] >= 0) { // add the cepstrum data to the sums for (int j = 0; j < cepstrumData.length; j++) { sums[j] += cepstrumData[j]; } numberDataCepstra++; } cepstraList.add(input); } else if (input instanceof DataEndSignal || input instanceof SpeechEndSignal) { cepstraList.add(input); break; } else { // DataStartSignal or other Signal cepstraList.add(input); } } } while (input != null); return numberDataCepstra; } /** Normalizes the list of Data. */ private void normalizeList() { StringBuilder cmn = new StringBuilder(); // calculate the mean first for (int i = 0; i < sums.length; i++) { sums[i] /= numberDataCepstra; cmn.append (formatter.format(sums[i])); cmn.append(' '); } logger.info(cmn.toString()); for (Data data : cepstraList) { if (data instanceof DoubleData) { double[] cepstrum = ((DoubleData)data).getValues(); for (int j = 0; j < cepstrum.length; j++) { cepstrum[j] -= sums[j]; // sums[] is now the means[] } } } } }