NaiveVocoder.java example

Explorer
marytts-master
/**
 * Copyright 2004-2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.signalproc.process;

import java.io.File;

import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;

import marytts.signalproc.display.FunctionGraph;
import marytts.signalproc.display.SignalGraph;
import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.DoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.math.MathUtils;

/**
 * A naive overlap-add time stretching algorithm without any phase correction; used only for demonstrating the artefacts that
 * arise from not correcting phase.
 * 
 * @author Marc Schröder
 * 
 */
public class NaiveVocoder extends FrameOverlapAddSource {
	public static final int DEFAULT_FRAMELENGTH = 2048;
	protected double rateChangeFactor;

	/**
	 * @param inputSource
	 *            input source
	 * @param samplingRate
	 *            sampling rate
	 * @param rateChangeFactor
	 *            the factor by which to speed up or slow down the source. Values greater than one will speed up, values smaller
	 *            than one will slow down the original.
	 */
	public NaiveVocoder(DoubleDataSource inputSource, int samplingRate, double rateChangeFactor) {
		this.rateChangeFactor = rateChangeFactor;
		initialise(inputSource, Window.HANNING, true, DEFAULT_FRAMELENGTH, samplingRate, null);
	}

	protected int getInputFrameshift(int outputFrameshift) {
		int inputFrameshift = (int) (outputFrameshift * rateChangeFactor);
		double actualFactor = (double) inputFrameshift / outputFrameshift;
		if (rateChangeFactor != actualFactor) {
			System.err.println("With output frameshift " + outputFrameshift + ", need to adjust rate change factor to "
					+ actualFactor);
			rateChangeFactor = actualFactor;
		}
		return inputFrameshift;
	}

	/**
	 * Based on the given rate change factor, compute the exact length change factor for a given signal length, based on the
	 * current frame length and input/output frame shifts.
	 * 
	 * From the illustrations in @see{FrameOverlapAddSource}, it can be seen that for a given frame length f and frame shift s,
	 * the length of a signal can be described as <code>l(n) = f + n*s - delta</code>.
	 * 
	 * f is fixed; s is si for input frameshift, so for output frameshift. For a given input length, one can compute n and rest
	 * and thus compute the output length.
	 * 
	 * @param inputLengthInSamples
	 *            inputLengthInSamples
	 * @return the output length
	 */
	public int computeOutputLength(int inputLengthInSamples) {
		int f = frameProvider.getFrameLengthSamples();
		int so = blockSize; // output frameshift
		int si = frameProvider.getFrameShiftSamples(); // input frameshift
		assert si == getInputFrameshift(so);
		int n = (int) Math.ceil(((double) inputLengthInSamples - f) / si);
		int delta = f + n * si - inputLengthInSamples;
		// System.err.println("li="+inputLengthInSamples+", f="+f+", si="+si+", n="+n+", delta="+delta+", => f+n*si-delta="+(f+n*si-delta));
		assert delta < si;
		int lo = f + n * so - delta;
		// System.err.println("so="+so+", => lo="+lo);
		return lo;
	}

	public static void main(String[] args) throws Exception {
		for (int i = 1; i < args.length; i++) {
			AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[i]));
			int samplingRate = (int) inputAudio.getFormat().getSampleRate();
			double[] signal = new AudioDoubleDataSource(inputAudio).getAllData();
			FunctionGraph signalGraph = new SignalGraph(signal, samplingRate);
			signalGraph.showInJFrame("signal", true, true);
			// SignalSpectrum signalSpectrum = new SignalSpectrum(signal, samplingRate);
			// signalSpectrum.showInJFrame("signal", true, true);
			double rateFactor = Double.parseDouble(args[0]);
			NaiveVocoder pv = new NaiveVocoder(new BufferedDoubleDataSource(signal), samplingRate, rateFactor);
			double[] result = pv.getAllData();
			FunctionGraph resultGraph = new SignalGraph(result, samplingRate);
			resultGraph.showInJFrame("result", true, true);
			// Spectrogram resultSpectrogram = new Spectrogram(result, samplingRate);
			// resultSpectrogram.showInJFrame("result", true, true);
			// SignalSpectrum resultSpectrum = new SignalSpectrum(result, samplingRate);
			// resultSpectrum.showInJFrame("result", true, true);
			System.err.println("Signal has length " + signal.length + ", result " + result.length);
			if (signal.length == result.length) {
				double err = MathUtils.sumSquaredError(signal, result);
				System.err.println("Sum squared error: " + err);
				// double[] difference = MathUtils.substract(signal, result);
				// FunctionGraph diffGraph = new SignalGraph(difference, samplingRate);
				// diffGraph.showInJFrame("difference", true, true);
			}
			System.err.println("Expected result length: " + pv.computeOutputLength(signal.length) + ", found: " + result.length);
			double meanSignalEnergy = MathUtils.mean(MathUtils.multiply(signal, signal));
			double meanResultEnergy = MathUtils.mean(MathUtils.multiply(result, result));
			System.err.println("Mean result energy: " + (meanResultEnergy / meanSignalEnergy * 100) + "% of mean signal energy");

			// DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(pv),
			// inputAudio.getFormat());
			// String outFileName = args[i].substring(0, args[i].length()-4) + "_copy.wav";
			// AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
		}

	}

}