RegularizedCepstrumEstimator.java example

Explorer
marytts-master
/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.signalproc.analysis;

import marytts.util.math.ArrayUtils;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;

//Note that RegularizedPreWarpedCepstrumEstimator or RegularizedPostWarpedCepstrumEstimator works much better
public class RegularizedCepstrumEstimator {
	public static final double DEFAULT_LAMBDA = 5e-4; // Regularization parameter

	// Warping for better estimation can be performed in two ways:
	// either pre-warping of linear frequency values in Bark-scale
	// or post-warping by converting cepstral coefficients computed from linear frequency values to mel scale
	public static final int REGULARIZED_CEPSTRUM_WITH_PRE_BARK_WARPING = 1;
	public static final int REGULARIZED_CEPSTRUM_WITH_POST_MEL_WARPING = 2;

	//

	// lambda: regularization term (typically on the order of 0.0001
	// Note that cepstrum is always computed using log amps, therefore the fitted spectrum computed from these cepstrum coeffs
	// will be in log amp domain
	protected static float[] freqsLinearAmps2cepstrum(double[] linearAmps, double[] freqsInHz, int samplingRateInHz,
			int cepsOrder, boolean isPreWarping, double[] weights, double lambda) {
		assert linearAmps.length == freqsInHz.length;

		double[] a = MathUtils.amp2db(linearAmps);
		int L = linearAmps.length;
		int p = cepsOrder;
		double[][] M = new double[L][p + 1];
		int i, j;
		double f;
		double denum = 1.0;

		if (isPreWarping)
			denum = (2.0 * SignalProcUtils.freq2barkNew(0.5 * samplingRateInHz));

		for (i = 0; i < L; i++) {
			M[i][0] = 1.0;
			if (isPreWarping) {
				f = SignalProcUtils.freq2barkNew(freqsInHz[i]) / denum;
				for (j = 1; j < p + 1; j++)
					M[i][j] = 2.0 * Math.cos(MathUtils.TWOPI * f * j);
			} else {
				f = SignalProcUtils.hz2radian(freqsInHz[i], samplingRateInHz);
				for (j = 1; j < p + 1; j++)
					M[i][j] = 2.0 * Math.cos(f * j);
			}
		}

		double[] diagR = new double[p + 1];
		double tmp = 8.0 * (Math.PI) * (Math.PI);
		for (i = 0; i < p + 1; i++)
			diagR[i] = tmp * i * i;
		double[][] R = MathUtils.toDiagonalMatrix(diagR);

		double[] cepsDouble = null;
		if (weights != null) {
			double[][] W = MathUtils.toDiagonalMatrix(weights);
			double[][] MTrans = MathUtils.transpoze(M);
			double[][] MTransW = MathUtils.matrixProduct(MTrans, W);
			double[][] MTransWM = MathUtils.matrixProduct(MTransW, M);
			double[][] lambdaR = MathUtils.multiply(lambda, R);
			double[] MTransWa = MathUtils.matrixProduct(MTransW, a);
			double[][] inverted = MathUtils.inverse(MathUtils.add(MTransWM, lambdaR));
			cepsDouble = MathUtils.matrixProduct(inverted, MTransWa);
		} else // No weights given
		{
			double[][] MTrans = MathUtils.transpoze(M);
			double[][] MTransM = MathUtils.matrixProduct(MTrans, M);
			double[][] lambdaR = MathUtils.multiply(lambda, R);
			double[] MTransa = MathUtils.matrixProduct(MTrans, a);
			double[][] inverted = MathUtils.inverse(MathUtils.add(MTransM, lambdaR));
			cepsDouble = MathUtils.matrixProduct(inverted, MTransa);
		}

		float[] ceps = ArrayUtils.copyDouble2Float(cepsDouble);

		return ceps;
	}

	protected static double[][] precomputeM(double[] freqsInHz, int samplingRateInHz, int cepsOrder, boolean isPreWarping) {
		int L = freqsInHz.length;
		int p = cepsOrder;
		double[][] M = new double[L][p + 1];
		int i, j;
		double f;
		double denum = 1.0;

		if (isPreWarping)
			denum = (2.0 * SignalProcUtils.freq2barkNew(0.5 * samplingRateInHz));

		for (i = 0; i < L; i++) {
			M[i][0] = 1.0;
			if (isPreWarping) {
				f = SignalProcUtils.freq2barkNew(freqsInHz[i]) / denum;
				for (j = 1; j < p + 1; j++)
					M[i][j] = 2.0 * Math.cos(MathUtils.TWOPI * f * j);
			} else {
				f = SignalProcUtils.hz2radian(freqsInHz[i], samplingRateInHz);
				for (j = 1; j < p + 1; j++)
					M[i][j] = 2.0 * Math.cos(f * j);
			}
		}

		return M;
	}

	public static double[][] precomputeMTransW(double[][] M, double[] weights) {
		double[][] MTransW = null;
		if (weights != null) {
			double[][] W = MathUtils.toDiagonalMatrix(weights);
			double[][] MTrans = MathUtils.transpoze(M);
			MTransW = MathUtils.matrixProduct(MTrans, W);
		} else
			// No weights given
			MTransW = MathUtils.transpoze(M);

		return MTransW;
	}

	public static double[][] precomputeMTransWM(double[][] MTransW, double[][] M) {
		double[][] MTransWM = MathUtils.matrixProduct(MTransW, M);

		return MTransWM;
	}

	public static double[][] precomputeLambdaR(double lambda, int cepsOrder) {
		int p = cepsOrder;
		double[] diagR = new double[p + 1];
		double tmp = 8.0 * (Math.PI) * (Math.PI);
		for (int i = 0; i < p + 1; i++)
			diagR[i] = tmp * i * i;
		double[][] R = MathUtils.toDiagonalMatrix(diagR);

		double[][] lambdaR = MathUtils.multiply(lambda, R);

		return lambdaR;
	}

	public static double[][] precomputeInverted(double[][] MTransWM, double[][] lambdaR) {
		double[][] inverted = MathUtils.inverse(MathUtils.add(MTransWM, lambdaR));

		return inverted;
	}

	// Another version when frequencies are fixed and all precomputations were done by calling precomputeForCepstrum with these
	// fixed values
	// Note that cepstrum is always computed using log amps, therefore the fitted spectrum computed from these cepstrum coeffs
	// will be in log amp domain
	public static float[] freqsLinearAmps2cepstrum(double[] linearAmps, double[][] MTransW, double[][] inverted) {
		double[] logAmps = MathUtils.log10(linearAmps);
		double[] a = MathUtils.multiply(logAmps, 20.0);

		double[] cepsDouble = null;

		double[] MTransWa = MathUtils.matrixProduct(MTransW, a);
		cepsDouble = MathUtils.matrixProduct(inverted, MTransWa);

		float[] ceps = ArrayUtils.copyDouble2Float(cepsDouble);

		return ceps;
	}

	protected static double[] cepstrum2logAmpHalfSpectrum(float[] ceps, int fftSize, int samplingRateInHz, boolean isPreWarping) {
		int maxFreq = SignalProcUtils.halfSpectrumSize(fftSize);
		double[] halfAbsSpectrum = new double[maxFreq];
		int p = ceps.length - 1;
		int i, k;

		double f;
		double denum = samplingRateInHz;

		if (isPreWarping)
			denum = (2.0 * SignalProcUtils.freq2barkNew(0.5 * samplingRateInHz));

		for (k = 0; k < maxFreq; k++) {
			halfAbsSpectrum[k] = ceps[0];
			if (isPreWarping) {
				f = SignalProcUtils.freq2barkNew(((double) k / (maxFreq - 1.0)) * 0.5 * samplingRateInHz) / denum;
				for (i = 1; i <= p; i++)
					halfAbsSpectrum[k] += 2.0 * ceps[i] * Math.cos(MathUtils.TWOPI * f * i);
			} else {
				f = SignalProcUtils.index2freq(k, samplingRateInHz, maxFreq - 1);
				f = SignalProcUtils.hz2radian(f, samplingRateInHz);
				for (i = 1; i <= p; i++)
					halfAbsSpectrum[k] += 2.0 * ceps[i] * Math.cos(f * i);
			}
		}

		return halfAbsSpectrum;
	}
}