/**
* Copyright 2004-2006 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.process;
import java.io.File;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import marytts.signalproc.display.FunctionGraph;
import marytts.signalproc.display.SignalGraph;
import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.DoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.math.MathUtils;
/**
* A naive overlap-add time stretching algorithm without any phase correction; used only for demonstrating the artefacts that
* arise from not correcting phase.
*
* @author Marc Schröder
*
*/
public class NaiveVocoder extends FrameOverlapAddSource {
public static final int DEFAULT_FRAMELENGTH = 2048;
protected double rateChangeFactor;
/**
* @param inputSource
* input source
* @param samplingRate
* sampling rate
* @param rateChangeFactor
* the factor by which to speed up or slow down the source. Values greater than one will speed up, values smaller
* than one will slow down the original.
*/
public NaiveVocoder(DoubleDataSource inputSource, int samplingRate, double rateChangeFactor) {
this.rateChangeFactor = rateChangeFactor;
initialise(inputSource, Window.HANNING, true, DEFAULT_FRAMELENGTH, samplingRate, null);
}
protected int getInputFrameshift(int outputFrameshift) {
int inputFrameshift = (int) (outputFrameshift * rateChangeFactor);
double actualFactor = (double) inputFrameshift / outputFrameshift;
if (rateChangeFactor != actualFactor) {
System.err.println("With output frameshift " + outputFrameshift + ", need to adjust rate change factor to "
+ actualFactor);
rateChangeFactor = actualFactor;
}
return inputFrameshift;
}
/**
* Based on the given rate change factor, compute the exact length change factor for a given signal length, based on the
* current frame length and input/output frame shifts.
*
* From the illustrations in @see{FrameOverlapAddSource}, it can be seen that for a given frame length f and frame shift s,
* the length of a signal can be described as <code>l(n) = f + n*s - delta</code>.
*
* f is fixed; s is si for input frameshift, so for output frameshift. For a given input length, one can compute n and rest
* and thus compute the output length.
*
* @param inputLengthInSamples
* inputLengthInSamples
* @return the output length
*/
public int computeOutputLength(int inputLengthInSamples) {
int f = frameProvider.getFrameLengthSamples();
int so = blockSize; // output frameshift
int si = frameProvider.getFrameShiftSamples(); // input frameshift
assert si == getInputFrameshift(so);
int n = (int) Math.ceil(((double) inputLengthInSamples - f) / si);
int delta = f + n * si - inputLengthInSamples;
// System.err.println("li="+inputLengthInSamples+", f="+f+", si="+si+", n="+n+", delta="+delta+", => f+n*si-delta="+(f+n*si-delta));
assert delta < si;
int lo = f + n * so - delta;
// System.err.println("so="+so+", => lo="+lo);
return lo;
}
public static void main(String[] args) throws Exception {
for (int i = 1; i < args.length; i++) {
AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[i]));
int samplingRate = (int) inputAudio.getFormat().getSampleRate();
double[] signal = new AudioDoubleDataSource(inputAudio).getAllData();
FunctionGraph signalGraph = new SignalGraph(signal, samplingRate);
signalGraph.showInJFrame("signal", true, true);
// SignalSpectrum signalSpectrum = new SignalSpectrum(signal, samplingRate);
// signalSpectrum.showInJFrame("signal", true, true);
double rateFactor = Double.parseDouble(args[0]);
NaiveVocoder pv = new NaiveVocoder(new BufferedDoubleDataSource(signal), samplingRate, rateFactor);
double[] result = pv.getAllData();
FunctionGraph resultGraph = new SignalGraph(result, samplingRate);
resultGraph.showInJFrame("result", true, true);
// Spectrogram resultSpectrogram = new Spectrogram(result, samplingRate);
// resultSpectrogram.showInJFrame("result", true, true);
// SignalSpectrum resultSpectrum = new SignalSpectrum(result, samplingRate);
// resultSpectrum.showInJFrame("result", true, true);
System.err.println("Signal has length " + signal.length + ", result " + result.length);
if (signal.length == result.length) {
double err = MathUtils.sumSquaredError(signal, result);
System.err.println("Sum squared error: " + err);
// double[] difference = MathUtils.substract(signal, result);
// FunctionGraph diffGraph = new SignalGraph(difference, samplingRate);
// diffGraph.showInJFrame("difference", true, true);
}
System.err.println("Expected result length: " + pv.computeOutputLength(signal.length) + ", found: " + result.length);
double meanSignalEnergy = MathUtils.mean(MathUtils.multiply(signal, signal));
double meanResultEnergy = MathUtils.mean(MathUtils.multiply(result, result));
System.err.println("Mean result energy: " + (meanResultEnergy / meanSignalEnergy * 100) + "% of mean signal energy");
// DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(pv),
// inputAudio.getFormat());
// String outFileName = args[i].substring(0, args[i].length()-4) + "_copy.wav";
// AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
}
}
}