PhaseVocoder.java example

Explorer
marytts-master
/**
 * Copyright 2004-2006 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.signalproc.process;

import java.io.File;

import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;

import marytts.signalproc.display.FunctionGraph;
import marytts.signalproc.display.SignalGraph;
import marytts.signalproc.display.SignalSpectrum;
import marytts.signalproc.display.Spectrogram;
import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.DoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.MathUtils;

/**
 * A phase vocoder implementation for time stretching. Phase unwrapping is performed.
 * 
 * @author Marc Schröder
 * 
 */
public class PhaseVocoder extends FrameOverlapAddSource {
	public static final int DEFAULT_FRAMELENGTH = 2048;
	protected double rateChangeFactor;

	/**
	 * @param inputSource
	 *            input source
	 * @param samplingRate
	 *            sampling rate
	 * @param rateChangeFactor
	 *            the factor by which to speed up or slow down the source. Values greater than one will speed up, values smaller
	 *            than one will slow down the original.
	 */
	public PhaseVocoder(DoubleDataSource inputSource, int samplingRate, double rateChangeFactor) {
		this.rateChangeFactor = rateChangeFactor;
		// Optimal framelength depends on samplingrate:
		// DEFAULT_FRAMELENGTH is considered optimal for 44100
		int frameLength = DEFAULT_FRAMELENGTH;
		int s = samplingRate;
		while ((s *= 2) <= 44100)
			frameLength /= 2;
		// System.err.println("PhaseVocoder: for samplingRate "+samplingRate+", using framelength "+frameLength);
		initialise(inputSource, Window.HANNING, true, frameLength, samplingRate, null);
		// TODO: The phrase unwrapper causes marytts.signalproc.tests.PhaseVocoderTests to fail. Should investigate at some stage.
		// processor = new PhaseUnwrapper(frameLength);
	}

	protected int getInputFrameshift(int outputFrameshift) {
		int inputFrameshift = (int) (outputFrameshift * rateChangeFactor);
		double actualFactor = (double) inputFrameshift / outputFrameshift;
		if (rateChangeFactor != actualFactor) {
			System.err.println("With output frameshift " + outputFrameshift + ", need to adjust rate change factor to "
					+ actualFactor);
			rateChangeFactor = actualFactor;
		}
		return inputFrameshift;
	}

	/**
	 * Based on the given rate change factor, compute the exact length change factor for a given signal length, based on the
	 * current frame length and input/output frame shifts.
	 * 
	 * From the illustrations in @see{FrameOverlapAddSource}, it can be seen that for a given frame length f and frame shift s,
	 * the length of a signal can be described as <code>l(n) = f + n*s - delta</code>.
	 * 
	 * f is fixed; s is si for input frameshift, so for output frameshift. For a given input length, one can compute n and rest
	 * and thus compute the output length.
	 * 
	 * @param inputLengthInSamples
	 *            inputLengthInSamples
	 * @return the output length
	 */
	public int computeOutputLength(int inputLengthInSamples) {
		int f = frameProvider.getFrameLengthSamples();
		int so = blockSize; // output frameshift
		int si = frameProvider.getFrameShiftSamples(); // input frameshift
		assert si == getInputFrameshift(so);
		int n = (int) Math.ceil(((double) inputLengthInSamples - f) / si);
		int delta = f + n * si - inputLengthInSamples;
		// System.err.println("li="+inputLengthInSamples+", f="+f+", si="+si+", n="+n+", delta="+delta+", => f+n*si-delta="+(f+n*si-delta));
		assert delta < si;
		int lo = f + n * so - delta;
		// System.err.println("so="+so+", => lo="+lo);
		return lo;
	}

	public class PhaseUnwrapper extends PolarFrequencyProcessor {
		/**
		 * The frequency-specific, frame-independent angle
		 */
		protected double[] omega;
		/**
		 * The wrapped input phase vector for the previous input frame.
		 */
		protected double[] prevPhi;
		/**
		 * Unwrapped phase difference vector for the difference between prevPhi and phi.
		 */
		protected double[] deltaPhi;
		/**
		 * The unwrapped output phase vector for the current frame.
		 */
		protected double[] psi;

		public PhaseUnwrapper(int fftSize) {
			super(fftSize);
			omega = new double[fftSize];
			int ifs = getInputFrameshift(blockSize);
			for (int i = 0; i < fftSize; i++) {
				omega[i] = MathUtils.TWOPI * ifs * i / fftSize;
			}
			prevPhi = new double[fftSize];
			deltaPhi = new double[fftSize];
			psi = new double[fftSize];
		}

		/**
		 * Perform the phase unwrapping of phi.
		 * 
		 * @param r
		 *            r
		 * @param phi
		 *            phi
		 */
		protected void processPolar(double[] r, double[] phi) {
			assert phi.length == prevPhi.length;
			for (int i = 0; i < phi.length; i++) {
				deltaPhi[i] = omega[i] + MathUtils.angleToDefaultAngle(phi[i] - prevPhi[i] - omega[i]);
				if (i == 123)
					System.err.println("i=" + i + ": phi=" + phi[i] + " prevPhi=" + prevPhi[i] + " diff in defaultrange="
							+ MathUtils.angleToDefaultAngle(phi[i] - prevPhi[i] - omega[i]) + " omega=" + omega[i] + " deltaPhi="
							+ deltaPhi[i]);
			}
			System.arraycopy(phi, 0, prevPhi, 0, phi.length);
			for (int i = 0; i < phi.length; i++) {
				psi[i] = MathUtils.angleToDefaultAngle(psi[i] + deltaPhi[i] / rateChangeFactor);
			}
			// And output the result in the input array
			System.arraycopy(psi, 0, phi, 0, phi.length);
		}
	}

	public static void main(String[] args) throws Exception {
		for (int i = 1; i < args.length; i++) {
			AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[i]));
			int samplingRate = (int) inputAudio.getFormat().getSampleRate();
			DoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
			double[] signalData = new AudioDoubleDataSource(inputAudio).getAllData();
			FunctionGraph signalGraph = new SignalGraph(signalData, samplingRate);
			signalGraph.showInJFrame("signal", true, true);
			SignalSpectrum signalSpectrum = new SignalSpectrum(signalData, samplingRate);
			signalSpectrum.showInJFrame("signal", true, true);
			signal = new BufferedDoubleDataSource(signalData);
			PhaseVocoder pv = new PhaseVocoder(signal, samplingRate, Double.parseDouble(args[0]));
			double[] result = pv.getAllData();
			FunctionGraph resultGraph = new SignalGraph(result, samplingRate);
			resultGraph.showInJFrame("result", true, true);
			Spectrogram resultSpectrogram = new Spectrogram(result, samplingRate);
			resultSpectrogram.showInJFrame("result", true, true);
			SignalSpectrum resultSpectrum = new SignalSpectrum(result, samplingRate);
			resultSpectrum.showInJFrame("result", true, true);
			System.err.println("Signal has length " + signalData.length + ", result " + result.length);
			if (signalData.length == result.length) {
				double err = MathUtils.sumSquaredError(signalData, result);
				System.err.println("Sum squared error: " + err);
				double[] difference = MathUtils.subtract(signalData, result);
				FunctionGraph diffGraph = new SignalGraph(difference, samplingRate);
				diffGraph.showInJFrame("difference", true, true);
			}
			double meanSignalEnergy = MathUtils.mean(MathUtils.multiply(signalData, signalData));
			double meanResultEnergy = MathUtils.mean(MathUtils.multiply(result, result));
			System.err.println("Mean result energy: " + (meanResultEnergy / meanSignalEnergy * 100) + "% of mean signal energy");

			DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(pv), inputAudio.getFormat());
			String outFileName = args[i].substring(0, args[i].length() - 4) + "_stretched_by_" + args[0] + ".wav";
			AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
		}

	}

}