/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.signalproc.analysis; import marytts.util.math.ArrayUtils; import marytts.util.math.MathUtils; import marytts.util.signal.SignalProcUtils; //Note that RegularizedPreWarpedCepstrumEstimator or RegularizedPostWarpedCepstrumEstimator works much better public class RegularizedCepstrumEstimator { public static final double DEFAULT_LAMBDA = 5e-4; // Regularization parameter // Warping for better estimation can be performed in two ways: // either pre-warping of linear frequency values in Bark-scale // or post-warping by converting cepstral coefficients computed from linear frequency values to mel scale public static final int REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING = 1; public static final int REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING = 2; // // lambda: regularization term (typically on the order of 0.0001 // Note that cepstrum is always computed using log amps, therefore the fitted spectrum computed from these cepstrum coeffs // will be in log amp domain protected static float[] freqsLinearAmps2cepstrum(double[] linearAmps, double[] freqsInHz, int samplingRateInHz, int cepsOrder, boolean isPreWarping, double[] weights, double lambda) { assert linearAmps.length == freqsInHz.length; double[] a = MathUtils.amp2db(linearAmps); int L = linearAmps.length; int p = cepsOrder; double[][] M = new double[L][p + 1]; int i, j; double f; double denum = 1.0; if (isPreWarping) denum = (2.0 * SignalProcUtils.freq2barkNew(0.5 * samplingRateInHz)); for (i = 0; i < L; i++) { M[i][0] = 1.0; if (isPreWarping) { f = SignalProcUtils.freq2barkNew(freqsInHz[i]) / denum; for (j = 1; j < p + 1; j++) M[i][j] = 2.0 * Math.cos(MathUtils.TWOPI * f * j); } else { f = SignalProcUtils.hz2radian(freqsInHz[i], samplingRateInHz); for (j = 1; j < p + 1; j++) M[i][j] = 2.0 * Math.cos(f * j); } } double[] diagR = new double[p + 1]; double tmp = 8.0 * (Math.PI) * (Math.PI); for (i = 0; i < p + 1; i++) diagR[i] = tmp * i * i; double[][] R = MathUtils.toDiagonalMatrix(diagR); double[] cepsDouble = null; if (weights != null) { double[][] W = MathUtils.toDiagonalMatrix(weights); double[][] MTrans = MathUtils.transpoze(M); double[][] MTransW = MathUtils.matrixProduct(MTrans, W); double[][] MTransWM = MathUtils.matrixProduct(MTransW, M); double[][] lambdaR = MathUtils.multiply(lambda, R); double[] MTransWa = MathUtils.matrixProduct(MTransW, a); double[][] inverted = MathUtils.inverse(MathUtils.add(MTransWM, lambdaR)); cepsDouble = MathUtils.matrixProduct(inverted, MTransWa); } else // No weights given { double[][] MTrans = MathUtils.transpoze(M); double[][] MTransM = MathUtils.matrixProduct(MTrans, M); double[][] lambdaR = MathUtils.multiply(lambda, R); double[] MTransa = MathUtils.matrixProduct(MTrans, a); double[][] inverted = MathUtils.inverse(MathUtils.add(MTransM, lambdaR)); cepsDouble = MathUtils.matrixProduct(inverted, MTransa); } float[] ceps = ArrayUtils.copyDouble2Float(cepsDouble); return ceps; } protected static double[][] precomputeM(double[] freqsInHz, int samplingRateInHz, int cepsOrder, boolean isPreWarping) { int L = freqsInHz.length; int p = cepsOrder; double[][] M = new double[L][p + 1]; int i, j; double f; double denum = 1.0; if (isPreWarping) denum = (2.0 * SignalProcUtils.freq2barkNew(0.5 * samplingRateInHz)); for (i = 0; i < L; i++) { M[i][0] = 1.0; if (isPreWarping) { f = SignalProcUtils.freq2barkNew(freqsInHz[i]) / denum; for (j = 1; j < p + 1; j++) M[i][j] = 2.0 * Math.cos(MathUtils.TWOPI * f * j); } else { f = SignalProcUtils.hz2radian(freqsInHz[i], samplingRateInHz); for (j = 1; j < p + 1; j++) M[i][j] = 2.0 * Math.cos(f * j); } } return M; } public static double[][] precomputeMTransW(double[][] M, double[] weights) { double[][] MTransW = null; if (weights != null) { double[][] W = MathUtils.toDiagonalMatrix(weights); double[][] MTrans = MathUtils.transpoze(M); MTransW = MathUtils.matrixProduct(MTrans, W); } else // No weights given MTransW = MathUtils.transpoze(M); return MTransW; } public static double[][] precomputeMTransWM(double[][] MTransW, double[][] M) { double[][] MTransWM = MathUtils.matrixProduct(MTransW, M); return MTransWM; } public static double[][] precomputeLambdaR(double lambda, int cepsOrder) { int p = cepsOrder; double[] diagR = new double[p + 1]; double tmp = 8.0 * (Math.PI) * (Math.PI); for (int i = 0; i < p + 1; i++) diagR[i] = tmp * i * i; double[][] R = MathUtils.toDiagonalMatrix(diagR); double[][] lambdaR = MathUtils.multiply(lambda, R); return lambdaR; } public static double[][] precomputeInverted(double[][] MTransWM, double[][] lambdaR) { double[][] inverted = MathUtils.inverse(MathUtils.add(MTransWM, lambdaR)); return inverted; } // Another version when frequencies are fixed and all precomputations were done by calling precomputeForCepstrum with these // fixed values // Note that cepstrum is always computed using log amps, therefore the fitted spectrum computed from these cepstrum coeffs // will be in log amp domain public static float[] freqsLinearAmps2cepstrum(double[] linearAmps, double[][] MTransW, double[][] inverted) { double[] logAmps = MathUtils.log10(linearAmps); double[] a = MathUtils.multiply(logAmps, 20.0); double[] cepsDouble = null; double[] MTransWa = MathUtils.matrixProduct(MTransW, a); cepsDouble = MathUtils.matrixProduct(inverted, MTransWa); float[] ceps = ArrayUtils.copyDouble2Float(cepsDouble); return ceps; } protected static double[] cepstrum2logAmpHalfSpectrum(float[] ceps, int fftSize, int samplingRateInHz, boolean isPreWarping) { int maxFreq = SignalProcUtils.halfSpectrumSize(fftSize); double[] halfAbsSpectrum = new double[maxFreq]; int p = ceps.length - 1; int i, k; double f; double denum = samplingRateInHz; if (isPreWarping) denum = (2.0 * SignalProcUtils.freq2barkNew(0.5 * samplingRateInHz)); for (k = 0; k < maxFreq; k++) { halfAbsSpectrum[k] = ceps[0]; if (isPreWarping) { f = SignalProcUtils.freq2barkNew(((double) k / (maxFreq - 1.0)) * 0.5 * samplingRateInHz) / denum; for (i = 1; i <= p; i++) halfAbsSpectrum[k] += 2.0 * ceps[i] * Math.cos(MathUtils.TWOPI * f * i); } else { f = SignalProcUtils.index2freq(k, samplingRateInHz, maxFreq - 1); f = SignalProcUtils.hz2radian(f, samplingRateInHz); for (i = 1; i <= p; i++) halfAbsSpectrum[k] += 2.0 * ceps[i] * Math.cos(f * i); } } return halfAbsSpectrum; } }