/* * Copyright 2013 Carnegie Mellon University. * All Rights Reserved. Use is subject to license terms. * * See the file "license.terms" for information on usage and * redistribution of this file, and for a DISCLAIMER OF ALL * WARRANTIES. * */ package edu.cmu.sphinx.frontend.denoise; import java.util.Arrays; import edu.cmu.sphinx.frontend.BaseDataProcessor; import edu.cmu.sphinx.frontend.Data; import edu.cmu.sphinx.frontend.DataProcessingException; import edu.cmu.sphinx.frontend.DataStartSignal; import edu.cmu.sphinx.frontend.DoubleData; import edu.cmu.sphinx.util.props.PropertyException; import edu.cmu.sphinx.util.props.PropertySheet; import edu.cmu.sphinx.util.props.S4Double; import edu.cmu.sphinx.util.props.S4Integer; /** * The noise filter, same as implemented in sphinxbase/sphinxtrain/pocketsphinx. * * Noise removal algorithm is inspired by the following papers Computationally * Efficient Speech Enchancement by Spectral Minina Tracking by G. Doblinger * * Power-Normalized Cepstral Coefficients (PNCC) for Robust Speech Recognition * by C. Kim. * * For the recent research and state of art see papers about IMRCA and A * Minimum-Mean-Square-Error Noise Reduction Algorithm On Mel-Frequency Cepstra * For Robust Speech Recognition by Dong Yu and others * */ public class Denoise extends BaseDataProcessor { double[] power; double[] noise; double[] floor; double[] peak; @S4Double(defaultValue = 0.7) public final static String LAMBDA_POWER = "lambdaPower"; double lambdaPower; @S4Double(defaultValue = 0.995) public final static String LAMBDA_A = "lambdaA"; double lambdaA; @S4Double(defaultValue = 0.5) public final static String LAMBDA_B = "lambdaB"; double lambdaB; @S4Double(defaultValue = 0.85) public final static String LAMBDA_T = "lambdaT"; double lambdaT; @S4Double(defaultValue = 0.2) public final static String MU_T = "muT"; double muT; @S4Double(defaultValue = 20.0) public final static String MAX_GAIN = "maxGain"; double maxGain; @S4Integer(defaultValue = 4) public final static String SMOOTH_WINDOW = "smoothWindow"; int smoothWindow; final static double EPS = 1e-10; public Denoise(double lambdaPower, double lambdaA, double lambdaB, double lambdaT, double muT, double maxGain, int smoothWindow) { this.lambdaPower = lambdaPower; this.lambdaA = lambdaA; this.lambdaB = lambdaB; this.lambdaT = lambdaT; this.muT = muT; this.maxGain = maxGain; this.smoothWindow = smoothWindow; } public Denoise() { } /* * (non-Javadoc) * * @see * edu.cmu.sphinx.util.props.Configurable#newProperties(edu.cmu.sphinx.util * .props.PropertySheet) */ @Override public void newProperties(PropertySheet ps) throws PropertyException { super.newProperties(ps); lambdaPower = ps.getDouble(LAMBDA_POWER); lambdaA = ps.getDouble(LAMBDA_A); lambdaB = ps.getDouble(LAMBDA_B); lambdaT = ps.getDouble(LAMBDA_T); muT = ps.getDouble(MU_T); maxGain = ps.getDouble(MAX_GAIN); smoothWindow = ps.getInt(SMOOTH_WINDOW); } @Override public Data getData() throws DataProcessingException { Data inputData = getPredecessor().getData(); int i; if (inputData instanceof DataStartSignal) { power = null; noise = null; floor = null; peak = null; return inputData; } if (!(inputData instanceof DoubleData)) { return inputData; } DoubleData inputDoubleData = (DoubleData) inputData; double[] input = inputDoubleData.getValues(); int length = input.length; if (power == null) initStatistics(input, length); updatePower(input); estimateEnvelope(power, noise); double[] signal = new double[length]; for (i = 0; i < length; i++) { signal[i] = Math.max(power[i] - noise[i], 0.0); } estimateEnvelope(signal, floor); tempMasking(signal); powerBoosting(signal); double[] gain = new double[length]; for (i = 0; i < length; i++) { gain[i] = signal[i] / (power[i] + EPS); gain[i] = Math.min(Math.max(gain[i], 1.0 / maxGain), maxGain); } double[] smoothGain = smooth(gain); for (i = 0; i < length; i++) { input[i] *= smoothGain[i]; } return inputData; } private double[] smooth(double[] gain) { double[] result = new double[gain.length]; for (int i = 0; i < gain.length; i++) { int start = Math.max(i - smoothWindow, 0); int end = Math.min(i + smoothWindow + 1, gain.length); double sum = 0.0; for (int j = start; j < end; j++) { sum += gain[j]; } result[i] = sum / (end - start); } return result; } private void powerBoosting(double[] signal) { for (int i = 0; i < signal.length; i++) { if (signal[i] < floor[i]) signal[i] = floor[i]; } } private void tempMasking(double[] signal) { for (int i = 0; i < signal.length; i++) { double in = signal[i]; peak[i] *= lambdaT; if (signal[i] < lambdaT * peak[i]) signal[i] = peak[i] * muT; if (in > peak[i]) peak[i] = in; } } private void updatePower(double[] input) { for (int i = 0; i < input.length; i++) { power[i] = lambdaPower * power[i] + (1 - lambdaPower) * input[i]; } } private void estimateEnvelope(double[] signal, double[] envelope) { for (int i = 0; i < signal.length; i++) { if (signal[i] > envelope[i]) envelope[i] = lambdaA * envelope[i] + (1 - lambdaA) * signal[i]; else envelope[i] = lambdaB * envelope[i] + (1 - lambdaB) * signal[i]; } } private void initStatistics(double[] input, int length) { /* no previous data, initialize the statistics */ power = Arrays.copyOf(input, length); noise = Arrays.copyOf(input, length); floor = new double[length]; peak = new double[length]; for (int i = 0; i < length; i++) { floor[i] = input[i] / maxGain; } } }