/**
* Copyright 2004-2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.process;
import java.io.File;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import marytts.signalproc.display.FunctionGraph;
import marytts.signalproc.display.SignalGraph;
import marytts.signalproc.display.SignalSpectrum;
import marytts.signalproc.display.Spectrogram;
import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.DoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.MathUtils;
/**
* A phase vocoder implementation for time stretching. Phase unwrapping is performed.
*
* @author Marc Schröder
*
*/
public class PhaseVocoder extends FrameOverlapAddSource {
public static final int DEFAULT_FRAMELENGTH = 2048;
protected double rateChangeFactor;
/**
* @param inputSource
* input source
* @param samplingRate
* sampling rate
* @param rateChangeFactor
* the factor by which to speed up or slow down the source. Values greater than one will speed up, values smaller
* than one will slow down the original.
*/
public PhaseVocoder(DoubleDataSource inputSource, int samplingRate, double rateChangeFactor) {
this.rateChangeFactor = rateChangeFactor;
// Optimal framelength depends on samplingrate:
// DEFAULT_FRAMELENGTH is considered optimal for 44100
int frameLength = DEFAULT_FRAMELENGTH;
int s = samplingRate;
while ((s *= 2) <= 44100)
frameLength /= 2;
// System.err.println("PhaseVocoder: for samplingRate "+samplingRate+", using framelength "+frameLength);
initialise(inputSource, Window.HANNING, true, frameLength, samplingRate, null);
// TODO: The phrase unwrapper causes marytts.signalproc.tests.PhaseVocoderTests to fail. Should investigate at some stage.
// processor = new PhaseUnwrapper(frameLength);
}
protected int getInputFrameshift(int outputFrameshift) {
int inputFrameshift = (int) (outputFrameshift * rateChangeFactor);
double actualFactor = (double) inputFrameshift / outputFrameshift;
if (rateChangeFactor != actualFactor) {
System.err.println("With output frameshift " + outputFrameshift + ", need to adjust rate change factor to "
+ actualFactor);
rateChangeFactor = actualFactor;
}
return inputFrameshift;
}
/**
* Based on the given rate change factor, compute the exact length change factor for a given signal length, based on the
* current frame length and input/output frame shifts.
*
* From the illustrations in @see{FrameOverlapAddSource}, it can be seen that for a given frame length f and frame shift s,
* the length of a signal can be described as <code>l(n) = f + n*s - delta</code>.
*
* f is fixed; s is si for input frameshift, so for output frameshift. For a given input length, one can compute n and rest
* and thus compute the output length.
*
* @param inputLengthInSamples
* inputLengthInSamples
* @return the output length
*/
public int computeOutputLength(int inputLengthInSamples) {
int f = frameProvider.getFrameLengthSamples();
int so = blockSize; // output frameshift
int si = frameProvider.getFrameShiftSamples(); // input frameshift
assert si == getInputFrameshift(so);
int n = (int) Math.ceil(((double) inputLengthInSamples - f) / si);
int delta = f + n * si - inputLengthInSamples;
// System.err.println("li="+inputLengthInSamples+", f="+f+", si="+si+", n="+n+", delta="+delta+", => f+n*si-delta="+(f+n*si-delta));
assert delta < si;
int lo = f + n * so - delta;
// System.err.println("so="+so+", => lo="+lo);
return lo;
}
public class PhaseUnwrapper extends PolarFrequencyProcessor {
/**
* The frequency-specific, frame-independent angle
*/
protected double[] omega;
/**
* The wrapped input phase vector for the previous input frame.
*/
protected double[] prevPhi;
/**
* Unwrapped phase difference vector for the difference between prevPhi and phi.
*/
protected double[] deltaPhi;
/**
* The unwrapped output phase vector for the current frame.
*/
protected double[] psi;
public PhaseUnwrapper(int fftSize) {
super(fftSize);
omega = new double[fftSize];
int ifs = getInputFrameshift(blockSize);
for (int i = 0; i < fftSize; i++) {
omega[i] = MathUtils.TWOPI * ifs * i / fftSize;
}
prevPhi = new double[fftSize];
deltaPhi = new double[fftSize];
psi = new double[fftSize];
}
/**
* Perform the phase unwrapping of phi.
*
* @param r
* r
* @param phi
* phi
*/
protected void processPolar(double[] r, double[] phi) {
assert phi.length == prevPhi.length;
for (int i = 0; i < phi.length; i++) {
deltaPhi[i] = omega[i] + MathUtils.angleToDefaultAngle(phi[i] - prevPhi[i] - omega[i]);
if (i == 123)
System.err.println("i=" + i + ": phi=" + phi[i] + " prevPhi=" + prevPhi[i] + " diff in defaultrange="
+ MathUtils.angleToDefaultAngle(phi[i] - prevPhi[i] - omega[i]) + " omega=" + omega[i] + " deltaPhi="
+ deltaPhi[i]);
}
System.arraycopy(phi, 0, prevPhi, 0, phi.length);
for (int i = 0; i < phi.length; i++) {
psi[i] = MathUtils.angleToDefaultAngle(psi[i] + deltaPhi[i] / rateChangeFactor);
}
// And output the result in the input array
System.arraycopy(psi, 0, phi, 0, phi.length);
}
}
public static void main(String[] args) throws Exception {
for (int i = 1; i < args.length; i++) {
AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[i]));
int samplingRate = (int) inputAudio.getFormat().getSampleRate();
DoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
double[] signalData = new AudioDoubleDataSource(inputAudio).getAllData();
FunctionGraph signalGraph = new SignalGraph(signalData, samplingRate);
signalGraph.showInJFrame("signal", true, true);
SignalSpectrum signalSpectrum = new SignalSpectrum(signalData, samplingRate);
signalSpectrum.showInJFrame("signal", true, true);
signal = new BufferedDoubleDataSource(signalData);
PhaseVocoder pv = new PhaseVocoder(signal, samplingRate, Double.parseDouble(args[0]));
double[] result = pv.getAllData();
FunctionGraph resultGraph = new SignalGraph(result, samplingRate);
resultGraph.showInJFrame("result", true, true);
Spectrogram resultSpectrogram = new Spectrogram(result, samplingRate);
resultSpectrogram.showInJFrame("result", true, true);
SignalSpectrum resultSpectrum = new SignalSpectrum(result, samplingRate);
resultSpectrum.showInJFrame("result", true, true);
System.err.println("Signal has length " + signalData.length + ", result " + result.length);
if (signalData.length == result.length) {
double err = MathUtils.sumSquaredError(signalData, result);
System.err.println("Sum squared error: " + err);
double[] difference = MathUtils.subtract(signalData, result);
FunctionGraph diffGraph = new SignalGraph(difference, samplingRate);
diffGraph.showInJFrame("difference", true, true);
}
double meanSignalEnergy = MathUtils.mean(MathUtils.multiply(signalData, signalData));
double meanResultEnergy = MathUtils.mean(MathUtils.multiply(result, result));
System.err.println("Mean result energy: " + (meanResultEnergy / meanSignalEnergy * 100) + "% of mean signal energy");
DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(pv), inputAudio.getFormat());
String outFileName = args[i].substring(0, args[i].length() - 4) + "_stretched_by_" + args[0] + ".wav";
AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
}
}
}