/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.process;
import java.io.File;
import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.MathUtils;
/**
* @author Oytun Türk
*
*/
public class VocalTractScalingSimpleProcessor extends FrequencyDomainProcessor {
private double[] vscales;
private int maxFreq;
private double[] realOut;
private double[] imagOut;
// Call this function whenever you want to change the scaling ratios
// If they are fixed for the whole signal, it is sufficient to specify them only once in the constructor below
public void SetVScales(double[] vscalesIn) {
if (vscalesIn.length > 0) {
vscales = MathUtils.modifySize(vscalesIn, maxFreq); // Modify length to match current length of spectrum
for (int i = 0; i < maxFreq; i++) {
if (vscales[i] < 0.05)
vscales[i] = 0.05; // Put a floor to avoid divide by zero
}
} else
vscales = null;
}
/**
* @param fftSize
* fftSize
* @param vscalesIn
* vscalesIn
*/
public VocalTractScalingSimpleProcessor(int fftSize, double[] vscalesIn) {
super(fftSize);
maxFreq = fftSize / 2 + 1;
SetVScales(vscalesIn);
realOut = new double[maxFreq];
imagOut = new double[maxFreq];
}
// Perform linear/non-linear vocal tract scaling
protected void process(double[] real, double[] imag) {
if (vscales != null) {
// Scale the vocal tract
int i;
int wInd;
for (i = 1; i <= maxFreq; i++) {
wInd = (int) (Math.floor(((double) i) / vscales[i - 1] + 0.5)); // Find new index
if (wInd < 1)
wInd = 1;
if (wInd > maxFreq)
wInd = maxFreq;
realOut[i - 1] = real[wInd - 1];
imagOut[i - 1] = imag[wInd - 1];
}
//
// Copy the modified DFT to input
System.arraycopy(realOut, 0, real, 0, maxFreq);
System.arraycopy(imagOut, 0, imag, 0, maxFreq);
//
// Generate the complex conjugate part to make the output the DFT of a real-valued signal
for (i = maxFreq + 1; i <= real.length; i++) {
real[i - 1] = real[2 * maxFreq - i - 1];
imag[i - 1] = imag[2 * maxFreq - i - 1];
}
//
}
}
public static void main(String[] args) throws Exception {
// Joint vocal tract and pitch scaling since there is no LP based vocal tract estimation yet
double[] vscales = { 2.0 };
//
for (int i = 0; i < args.length; i++) {
AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[i]));
int samplingRate = (int) inputAudio.getFormat().getSampleRate();
AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
FrameOverlapAddSource foas = new FrameOverlapAddSource(signal, Window.HANNING, true, 1024, samplingRate,
new VocalTractScalingSimpleProcessor(1024, vscales));
DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(foas), inputAudio.getFormat());
String outFileName = args[i].substring(0, args[i].length() - 4) + "_vocalTractSimpleScaled.wav";
AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
}
}
}