VocalTractScalingSimpleProcessor.java example

Explorer
marytts-master
/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.signalproc.process;

import java.io.File;

import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;

import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.math.MathUtils;

/**
 * @author Oytun Türk
 * 
 */
public class VocalTractScalingSimpleProcessor extends FrequencyDomainProcessor {

	private double[] vscales;
	private int maxFreq;
	private double[] realOut;
	private double[] imagOut;

	// Call this function whenever you want to change the scaling ratios
	// If they are fixed for the whole signal, it is sufficient to specify them only once in the constructor below
	public void SetVScales(double[] vscalesIn) {
		if (vscalesIn.length > 0) {
			vscales = MathUtils.modifySize(vscalesIn, maxFreq); // Modify length to match current length of spectrum

			for (int i = 0; i < maxFreq; i++) {
				if (vscales[i] < 0.05)
					vscales[i] = 0.05; // Put a floor to avoid divide by zero
			}
		} else
			vscales = null;
	}

	/**
	 * @param fftSize
	 *            fftSize
	 * @param vscalesIn
	 *            vscalesIn
	 */
	public VocalTractScalingSimpleProcessor(int fftSize, double[] vscalesIn) {
		super(fftSize);

		maxFreq = fftSize / 2 + 1;

		SetVScales(vscalesIn);

		realOut = new double[maxFreq];
		imagOut = new double[maxFreq];
	}

	// Perform linear/non-linear vocal tract scaling
	protected void process(double[] real, double[] imag) {
		if (vscales != null) {
			// Scale the vocal tract
			int i;
			int wInd;
			for (i = 1; i <= maxFreq; i++) {
				wInd = (int) (Math.floor(((double) i) / vscales[i - 1] + 0.5)); // Find new index
				if (wInd < 1)
					wInd = 1;
				if (wInd > maxFreq)
					wInd = maxFreq;

				realOut[i - 1] = real[wInd - 1];
				imagOut[i - 1] = imag[wInd - 1];
			}
			//

			// Copy the modified DFT to input
			System.arraycopy(realOut, 0, real, 0, maxFreq);
			System.arraycopy(imagOut, 0, imag, 0, maxFreq);
			//

			// Generate the complex conjugate part to make the output the DFT of a real-valued signal
			for (i = maxFreq + 1; i <= real.length; i++) {
				real[i - 1] = real[2 * maxFreq - i - 1];
				imag[i - 1] = imag[2 * maxFreq - i - 1];
			}
			//
		}
	}

	public static void main(String[] args) throws Exception {
		// Joint vocal tract and pitch scaling since there is no LP based vocal tract estimation yet
		double[] vscales = { 2.0 };
		//

		for (int i = 0; i < args.length; i++) {
			AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(args[i]));
			int samplingRate = (int) inputAudio.getFormat().getSampleRate();
			AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio);
			FrameOverlapAddSource foas = new FrameOverlapAddSource(signal, Window.HANNING, true, 1024, samplingRate,
					new VocalTractScalingSimpleProcessor(1024, vscales));
			DDSAudioInputStream outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(foas), inputAudio.getFormat());
			String outFileName = args[i].substring(0, args[i].length() - 4) + "_vocalTractSimpleScaled.wav";
			AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outFileName));
		}
	}
}