FdpsolaAdapter.java example

Explorer
marytts-master
/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.signalproc.adaptation;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;

import javax.sound.sampled.AudioFileFormat;
import javax.sound.sampled.AudioFormat;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;

import marytts.signalproc.adaptation.codebook.WeightedCodebook;
import marytts.signalproc.adaptation.codebook.WeightedCodebookLsfMatch;
import marytts.signalproc.adaptation.codebook.WeightedCodebookMapper;
import marytts.signalproc.adaptation.codebook.WeightedCodebookTransformerParams;
import marytts.signalproc.adaptation.gmm.jointgmm.JointGMMMapper;
import marytts.signalproc.adaptation.gmm.jointgmm.JointGMMMatch;
import marytts.signalproc.adaptation.gmm.jointgmm.JointGMMSet;
import marytts.signalproc.adaptation.gmm.jointgmm.JointGMMTransformerParams;
import marytts.signalproc.adaptation.prosody.PitchMapping;
import marytts.signalproc.adaptation.prosody.PitchStatistics;
import marytts.signalproc.adaptation.prosody.PitchTransformationData;
import marytts.signalproc.adaptation.prosody.PitchTransformer;
import marytts.signalproc.adaptation.prosody.ProsodyTransformerParams;
import marytts.signalproc.adaptation.smoothing.SmoothingDefinitions;
import marytts.signalproc.adaptation.smoothing.SmoothingFile;
import marytts.signalproc.adaptation.smoothing.TemporalSmoother;
import marytts.signalproc.analysis.F0ReaderWriter;
import marytts.signalproc.analysis.Labels;
import marytts.signalproc.analysis.LpcAnalyser;
import marytts.signalproc.analysis.LsfAnalyser;
import marytts.signalproc.analysis.PitchMarks;
import marytts.signalproc.analysis.PitchReaderWriter;
import marytts.signalproc.analysis.LpcAnalyser.LpCoeffs;
import marytts.signalproc.process.PsolaFrameProvider;
import marytts.signalproc.process.VoiceModificationParametersPreprocessor;
import marytts.signalproc.window.DynamicWindow;
import marytts.signalproc.window.Window;
import marytts.util.data.BufferedDoubleDataSource;
import marytts.util.data.DoubleDataSource;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.data.audio.DDSAudioInputStream;
import marytts.util.display.DisplayUtils;
import marytts.util.io.FileUtils;
import marytts.util.io.LEDataInputStream;
import marytts.util.io.LEDataOutputStream;
import marytts.util.math.ArrayUtils;
import marytts.util.math.ComplexArray;
import marytts.util.math.FFTMixedRadix;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;

/**
 * A class that supports voice conversion through weighted codebook mapping or joint-GMMs and FDPSOLA based prosody and vocal
 * tract modifications
 * 
 * Reference: Moulines, E. and W. Verhelst, 1995, “Time-Domain and Frequency-Domain Techniques for Prosodic Modification of
 * Speech” in Kleijn and Paliwal (eds.), Speech Coding And Synthesis, pp. 519-555, Elsevier Science B.V., Netherlands.
 * 
 * @author Oytun Türk
 * 
 */
public class FdpsolaAdapter {
	protected DoubleDataSource input;
	protected AudioInputStream inputAudio;
	protected DDSAudioInputStream outputAudio;
	protected VoiceModificationParametersPreprocessor modParams;
	protected int numfrm;
	protected int numfrmFixed;
	protected int lpOrder; // LP analysis order
	protected String outputFile;
	protected String tempOutBinaryFile;
	protected int origLen;
	protected PitchMarks pm;
	protected double[] f0s;
	protected PsolaFrameProvider psFrm;
	protected double wsFixedInSeconds;
	protected double ssFixedInSeconds;
	protected int numPeriods;
	protected static int NUM_PITCH_SYNC_PERIODS = 3;

	public boolean bSilent;
	protected LEDataOutputStream dout; // Output stream for big-endian wav tests
	protected LEDataInputStream din; // Input stream for big-endian wav tests
	protected DynamicWindow windowIn;
	protected DynamicWindow windowOut;
	protected double[] wgt;
	protected double[] wgty;

	protected int frmSize;
	protected int newFrmSize;
	protected int newPeriod;
	protected int synthFrmInd;
	protected double localDurDiff;
	protected int repeatSkipCount; // -1:skip frame, 0:no repetition (use synthesized frame as it is), >0: number of repetitions
									// for synthesized frame
	protected double localDurDiffSaved;
	protected double sumLocalDurDiffs;
	protected double nextAdd;

	protected int synthSt;
	protected int synthTotal;

	protected int maxFrmSize;
	protected int maxNewFrmSize;
	protected int synthFrameInd;
	protected boolean bLastFrame;
	protected boolean bBroke;

	protected int outBuffLen;
	protected double[] outBuff;
	protected int outBuffStart;
	protected int totalWrittenToFile;

	protected double[] ySynthBuff;
	protected double[] wSynthBuff;
	protected int ySynthInd;
	protected double[] frm;
	protected boolean bWarp;

	protected double[] inputVT;
	protected double[] py2;
	protected ComplexArray hy;
	protected double[] frmy;
	protected double frmEn;
	protected double frmyEn;
	protected double gain;
	protected int newSkipSize;
	protected int halfWin;
	protected double[] newVScales;
	protected double[] tmpvsc;
	// protected boolean isWavFileOutput;
	protected int inputFrameIndex;
	protected static double MIN_PSCALE = 0.1;
	protected static double MAX_PSCALE = 5.0;
	protected static double MIN_TSCALE = 0.1;
	protected static double MAX_TSCALE = 5.0;
	protected int fs;

	protected double tscaleSingle;

	private double desiredFrameTime;
	private boolean bShowSpectralPlots;

	private PitchTransformer pitchTransformer;

	private SmoothingFile smoothingFile;
	private double[][] smoothedVocalTract;
	private int smoothedInd;

	private int[] preselectedIndices;
	private int[] allIndices;
	private Labels labels;
	private Labels targetLabels;
	private int currentLabelIndex;
	private double[][] targetLsfs;

	private BaselineTransformerParams baseParams;

	public FdpsolaAdapter(BaselineAdaptationItem inputItem, String strOutputFile, WeightedCodebookTransformerParams wctParamsIn,
			double[] pscales, double[] tscales, double[] escales, double[] vscales) throws UnsupportedAudioFileException,
			IOException {
		baseParams = new WeightedCodebookTransformerParams(wctParamsIn);

		init(inputItem, strOutputFile, pscales, tscales, escales, vscales);
	}

	public FdpsolaAdapter(BaselineAdaptationItem inputItem, String strOutputFile, JointGMMTransformerParams jgmmParamsIn,
			double[] pscales, double[] tscales, double[] escales, double[] vscales) throws UnsupportedAudioFileException,
			IOException {
		baseParams = new JointGMMTransformerParams(jgmmParamsIn);

		init(inputItem, strOutputFile, pscales, tscales, escales, vscales);
	}

	public void init(BaselineAdaptationItem inputItem, String strOutputFile, double[] pscales, double[] tscales,
			double[] escales, double[] vscales) throws IOException {
		// Smoothing
		smoothingFile = null;
		if (baseParams.smoothingState == SmoothingDefinitions.NONE)
			baseParams.smoothedVocalTractFile = "";
		if (baseParams.smoothingState == SmoothingDefinitions.ESTIMATING_SMOOTHED_VOCAL_TRACT) {
			if (baseParams.smoothedVocalTractFile == "")
				throw new IllegalArgumentException("smoothedVocalTractFile not valid");
			else {
				smoothingFile = new SmoothingFile(baseParams.smoothedVocalTractFile, SmoothingFile.OPEN_FOR_WRITE);
				smoothingFile.smoothingMethod = baseParams.smoothingMethod;
				smoothingFile.writeHeader();
			}
		}

		if (baseParams.smoothingState == SmoothingDefinitions.TRANSFORMING_TO_SMOOTHED_VOCAL_TRACT
				&& baseParams.smoothingMethod != SmoothingDefinitions.NO_SMOOTHING) {
			if (!FileUtils.exists(baseParams.smoothedVocalTractFile))
				throw new IllegalArgumentException("smoothedVocalTractFile not found");
			else {
				smoothingFile = new SmoothingFile(baseParams.smoothedVocalTractFile, SmoothingFile.OPEN_FOR_READ);
				smoothedVocalTract = smoothingFile.readAll();
				smoothedInd = 0;
			}
		}
		//

		pitchTransformer = new PitchTransformer();
		inputAudio = null;
		input = null;
		pm = null;
		f0s = null;

		wsFixedInSeconds = 0.02;
		ssFixedInSeconds = 0.01;
		numPeriods = NUM_PITCH_SYNC_PERIODS;

		origLen = 0;
		fs = 16000;

		numfrm = 0; // Total pitch synchronous frames (This is the actual number of frames to be processed)
		numfrmFixed = 0; // Total frames if the analysis was fixed skip-rate

		modParams = null;

		outputFile = null;

		tscaleSingle = 1.0;

		boolean bContinue = true;

		if (!FileUtils.exists(inputItem.audioFile)) {
			System.out.println("Error! Audio file " + inputItem.audioFile + " not found.");
			bContinue = false;
		}

		if (!FileUtils.exists(inputItem.f0File) && !FileUtils.exists(inputItem.pitchFile)) {
			System.out.println("Error! No F0 or pitch file found: " + inputItem.f0File + " " + inputItem.pitchFile);
			bContinue = false;
		}

		if (strOutputFile == null || strOutputFile == "") {
			System.out.println("Invalid output file...");
			bContinue = false;
		}

		if (bContinue) {
			boolean isF0File = false;
			if (FileUtils.exists(inputItem.f0File))
				isF0File = true;

			try {
				inputAudio = AudioSystem.getAudioInputStream(new File(inputItem.audioFile));
			} catch (UnsupportedAudioFileException e) {
				throw new IOException("Cannot open audio", e);
			}

			input = new AudioDoubleDataSource(inputAudio);

			origLen = (int) input.getDataLength();
			fs = (int) inputAudio.getFormat().getSampleRate();

			PitchReaderWriter f0 = null;
			if (FileUtils.exists(inputItem.f0File))
				f0 = new F0ReaderWriter(inputItem.f0File);
			else
				f0 = new PitchReaderWriter(inputItem.pitchFile);

			pm = SignalProcUtils.pitchContour2pitchMarks(f0.contour, fs, origLen, f0.header.windowSizeInSeconds,
					f0.header.skipSizeInSeconds, true, 0);

			numfrmFixed = (int) (Math.floor(((double) (origLen + pm.totalZerosToPadd) / fs - 0.5 * wsFixedInSeconds)
					/ ssFixedInSeconds + 0.5) + 2); // Total frames if the analysis was fixed skip-rate
			if (!baseParams.isFixedRateVocalTractConversion)
				numfrm = pm.pitchMarks.length - numPeriods; // Total pitch synchronous frames (This is the actual number of frames
															// to be processed)
			else
				numfrm = numfrmFixed;

			f0s = SignalProcUtils.fixedRateF0Values(pm, wsFixedInSeconds, ssFixedInSeconds, numfrmFixed, fs);

			lpOrder = SignalProcUtils.getLPOrder(fs);

			// Estimation of time varying pitch, duration, energy, and vocal tract scaling amounts using either:
			// - pscales, tscales, escales, vscales
			// - or FESTIVAL_UTT(for pitch and duration) and target wav file(for energy)
			if (!baseParams.isPitchFromTargetFile && !baseParams.isDurationFromTargetFile && !baseParams.isEnergyFromTargetFile) {
				modParams = new VoiceModificationParametersPreprocessor(fs, lpOrder, pscales, tscales, escales, vscales,
						pm.pitchMarks, wsFixedInSeconds, ssFixedInSeconds, numfrm, numfrmFixed, numPeriods,
						baseParams.isFixedRateVocalTractConversion);
				tscaleSingle = modParams.tscaleSingle;
			} else {
				// inputItem.targetEnergyFile should be computed from inputItem.targetWavFile at this point
				// inputItem.energyFile should be computed from inputItem.audioFile at this point

				String targetAlignmentFile = null;
				if (baseParams.targetAlignmentFileType == BaselineTransformerParams.LABELS)
					targetAlignmentFile = inputItem.targetLabelFile;
				else if (baseParams.targetAlignmentFileType == BaselineTransformerParams.FESTIVAL_UTT)
					targetAlignmentFile = inputItem.targetFestivalUttFile;

				String sourcePitchContourFile;
				if (isF0File)
					sourcePitchContourFile = inputItem.f0File;
				else
					sourcePitchContourFile = inputItem.pitchFile;

				String targetPitchContourFile;
				if (isF0File)
					targetPitchContourFile = inputItem.targetF0File;
				else
					targetPitchContourFile = inputItem.targetPitchFile;

				modParams = new VoiceModificationParametersPreprocessor(sourcePitchContourFile, isF0File, inputItem.labelFile,
						inputItem.audioFile, targetPitchContourFile, inputItem.targetWavFile, baseParams.isPitchFromTargetFile,
						baseParams.pitchFromTargetMethod, baseParams.isDurationFromTargetFile,
						baseParams.durationFromTargetMethod, baseParams.isEnergyFromTargetFile,
						baseParams.targetAlignmentFileType, targetAlignmentFile, pm.pitchMarks, wsFixedInSeconds,
						ssFixedInSeconds, numfrm, numfrmFixed, numPeriods, baseParams.isFixedRateVocalTractConversion);

				tscaleSingle = 1.0;
				for (int i = 0; i < modParams.tscalesVar.length; i++) {
					if (modParams.tscalesVar[i] != 1.0) {
						tscaleSingle = -1.0;
						break;
					}
				}
			}

			outputFile = strOutputFile;

			if (inputItem.labelFile != "" && FileUtils.exists(inputItem.labelFile))
				labels = new Labels(inputItem.labelFile);
			else
				labels = null;

			if (inputItem.targetLabelFile != "" && FileUtils.exists(inputItem.targetLabelFile))
				targetLabels = new Labels(inputItem.targetLabelFile);
			else
				targetLabels = null;

			if (inputItem.targetWavFile != "" && FileUtils.exists(inputItem.targetWavFile) && baseParams.isLsfsFromTargetFile) {
				try {
					targetLsfs = LsfAnalyser.lsfAnalyzeWavFile(inputItem.targetWavFile, baseParams.lsfParams);
				} catch (UnsupportedAudioFileException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				} catch (IOException e) {
					// TODO Auto-generated catch block
					e.printStackTrace();
				}
			} else
				targetLsfs = null;
		}

		if (bContinue) {
			tmpvsc = new double[1];
			bSilent = false;

			if (outputFile != null)
				tempOutBinaryFile = outputFile + ".bin";

			if (!baseParams.isFixedRateVocalTractConversion)
				psFrm = new PsolaFrameProvider(input, pm, modParams.fs, modParams.numPeriods);
			else
				psFrm = new PsolaFrameProvider(input, wsFixedInSeconds, ssFixedInSeconds, modParams.fs, numfrm);

			try {
				dout = new LEDataOutputStream(tempOutBinaryFile);
			} catch (FileNotFoundException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}

			windowIn = new DynamicWindow(Window.HANNING);
			windowOut = new DynamicWindow(Window.HANNING);

			frmSize = 0;
			newFrmSize = 0;
			newPeriod = 0;
			synthFrmInd = 0;
			localDurDiff = 0.0;
			repeatSkipCount = 0; // -1:skip frame, 0:no repetition (use synthesized frame as it is), >0: number of repetitions for
									// synthesized frame
			localDurDiffSaved = 0.0;
			sumLocalDurDiffs = 0.0;
			nextAdd = 0.0;

			synthSt = pm.pitchMarks[0];

			synthTotal = 0;

			maxFrmSize = (int) (numPeriods * fs / 40.0);
			if ((maxFrmSize % 2) != 0)
				maxFrmSize++;

			maxNewFrmSize = (int) (Math.floor(maxFrmSize / MIN_PSCALE + 0.5));
			if ((maxNewFrmSize % 2) != 0)
				maxNewFrmSize++;

			synthFrameInd = 0;
			bLastFrame = false;
			bBroke = false;

			outBuffLen = 500000;
			outBuff = MathUtils.zeros(outBuffLen);
			outBuffStart = 1;
			totalWrittenToFile = 0;

			ySynthBuff = MathUtils.zeros(maxNewFrmSize);
			wSynthBuff = MathUtils.zeros(maxNewFrmSize);
			ySynthInd = 1;
		}
	}

	public void fdpsolaOnline(VocalTractTransformationFunction vtMapper, VocalTractTransformationData vtData,
			PitchTransformationData ptData) throws IOException {
		int i;
		double[] frmIn;
		boolean isLastInputFrame;
		int inputFrameSize;
		int currentPeriod;

		desiredFrameTime = 1.05;
		bShowSpectralPlots = false;

		fs = (int) inputAudio.getFormat().getSampleRate();

		PitchStatistics inputF0Statistics = new PitchStatistics(baseParams.prosodyParams.pitchStatisticsType, f0s);

		double[] targetF0s = null;

		if (!baseParams.isPitchFromTargetFile) {
			if (ptData instanceof PitchMapping) {
				targetF0s = pitchTransformer.transform(baseParams.prosodyParams, ((PitchMapping) ptData).f0StatisticsMapping,
						inputF0Statistics, f0s, modParams.pscalesVar);
			} else {
				baseParams.prosodyParams.pitchTransformationMethod = ProsodyTransformerParams.USE_ONLY_PSCALES;
				targetF0s = pitchTransformer.transform(baseParams.prosodyParams, ((PitchMapping) ptData).f0StatisticsMapping,
						inputF0Statistics, f0s, modParams.pscalesVar);
			}
		} else {
			targetF0s = new double[numfrm];
		}

		preselectedIndices = null;
		allIndices = null;
		if (vtData instanceof WeightedCodebook && baseParams instanceof WeightedCodebookTransformerParams
				&& !((WeightedCodebookTransformerParams) baseParams).isContextBasedPreselection) {
			// Whole codebook
			allIndices = new int[((WeightedCodebook) vtData).entries.length];
			for (i = 0; i < allIndices.length; i++)
				allIndices[i] = i;
		}

		inputFrameIndex = 0;
		for (i = 0; i < numfrm; i++) {
			frmIn = psFrm.getNextFrame();

			if (bBroke)
				break;

			if (i == numfrm - 1)
				isLastInputFrame = true;
			else
				isLastInputFrame = false;

			if (!baseParams.isFixedRateVocalTractConversion) {
				currentPeriod = pm.pitchMarks[i + 1] - pm.pitchMarks[i];
				inputFrameSize = pm.pitchMarks[i + modParams.numPeriods] - pm.pitchMarks[i] + 1;
			} else {
				currentPeriod = -1;
				inputFrameSize = frmIn.length;
			}

			double targetF0Value = 0.0;
			double currentF0Value;
			int index = (int) (Math.floor((psFrm.getCurrentTime() - 0.5 * wsFixedInSeconds) / ssFixedInSeconds + 0.5));

			if (!baseParams.isPitchFromTargetFile) {
				index = MathUtils.CheckLimits(index, 0, targetF0s.length - 1);
				targetF0Value = targetF0s[index];
			}

			currentF0Value = f0s[index];

			if (baseParams.isPitchFromTargetFile) {
				targetF0Value = currentF0Value * modParams.pscalesVar[i];
				targetF0s[i] = targetF0Value;
			}

			boolean isVoiced;
			if (!baseParams.isFixedRateVocalTractConversion) {
				if (pm.f0s[i] > 10.0)
					isVoiced = true;
				else
					isVoiced = false;
			} else {
				if (currentF0Value > 10.0)
					isVoiced = true;
				else
					isVoiced = false;
			}

			double currentF0;
			if (isVoiced)
				currentF0 = fs / currentPeriod;
			else
				currentF0 = 0.0;

			if (labels != null)
				currentLabelIndex = SignalProcUtils.time2LabelIndex(psFrm.getCurrentTime(), labels);
			else
				currentLabelIndex = -1;

			processFrame(frmIn, isVoiced, currentF0, targetF0Value, modParams.tscalesVar[i], modParams.escalesVar[i],
					modParams.vscalesVar[i], isLastInputFrame, currentPeriod, inputFrameSize, vtMapper, vtData);

			if (baseParams.isVocalTractTransformation
					&& baseParams.smoothingState == SmoothingDefinitions.TRANSFORMING_TO_SMOOTHED_VOCAL_TRACT
					&& baseParams.smoothingMethod != SmoothingDefinitions.NO_SMOOTHING) {
				smoothedInd++;
				if (smoothedInd > smoothedVocalTract.length - 1)
					smoothedInd = smoothedVocalTract.length - 1;
			}
		}

		writeFinal();

		convertToWav(inputAudio.getFormat());

		inputAudio.close();

		// Perform smoothing on the vocal tract parameter file
		if (baseParams.smoothingState == SmoothingDefinitions.ESTIMATING_SMOOTHED_VOCAL_TRACT) {
			if (baseParams.isVocalTractTransformation) {
				System.out.println("Temporal smoothing started using " + String.valueOf(baseParams.smoothingNumNeighbours)
						+ " neighbours...");
				smoothingFile.close();
				smoothingFile = new SmoothingFile(baseParams.smoothedVocalTractFile, SmoothingFile.OPEN_FOR_READ);
				double[][] vts = smoothingFile.readAll();

				double[] tmp1 = new double[vts.length];
				for (i = 0; i < vts.length; i++)
					tmp1[i] = vts[i][20];

				vts = TemporalSmoother.smooth(vts, baseParams.smoothingNumNeighbours);

				double[] tmp2 = new double[vts.length];
				for (i = 0; i < vts.length; i++)
					tmp2[i] = vts[i][20];

				smoothingFile = new SmoothingFile(baseParams.smoothedVocalTractFile, SmoothingFile.OPEN_FOR_WRITE,
						baseParams.smoothingMethod);
				smoothingFile.writeAll(vts);
				System.out.println("Temporal smoothing completed...");
			}
		} else if (baseParams.smoothingState == SmoothingDefinitions.TRANSFORMING_TO_SMOOTHED_VOCAL_TRACT)
			FileUtils.delete(baseParams.smoothedVocalTractFile);
		//
		/*
		 * MaryUtils.plot(f0s); MaryUtils.plot(targetF0s);
		 */
	}

	/*
	 * Voice conversion version of FD-PSOLA algorithm. The original FD-PSOLA is described in:
	 * 
	 * Moulines, E. and W. Verhelst, 1995, “Time-Domain and Frequency-Domain Techniques for Prosodic Modification of Speech” in
	 * Kleijn and Paliwal (eds.), Speech Coding And Synthesis, pp. 519-555, Elsevier Science B.V., Netherlands.
	 * 
	 * and the spectral conversion using codebook mapping in:
	 * 
	 * Arslan, L. M., 1999, “Speaker Transformation Algorithm using Segmental Codebooks”, Speech Communication, 28, pp. 211-226.
	 */
	public double[] processFrame(double[] frmIn, boolean isVoiced, double currentF0, double targetF0, double tscale,
			double escale, double vscale, boolean isLastInputFrame, int currentPeriod, int inputFrameSize,
			VocalTractTransformationFunction mapper, VocalTractTransformationData data) throws IOException {
		double pscale;

		if (currentF0 > 10.0)
			pscale = targetF0 / currentF0;
		else
			pscale = 1.0;

		if (pscale < MIN_PSCALE)
			pscale = MIN_PSCALE;
		if (pscale > MAX_PSCALE)
			pscale = MAX_PSCALE;

		if (tscale < MIN_TSCALE)
			tscale = MIN_TSCALE;
		if (tscale > MAX_TSCALE)
			tscale = MAX_TSCALE;

		double[] output = null;
		double[] outputTmp = null;
		int j, k, wInd, kMax;
		int tmpFix, tmpAdd, tmpMul;
		int remain;
		int kInd;
		VocalTractMatch match = null;

		windowIn = new DynamicWindow(baseParams.lsfParams.windowType);
		windowOut = new DynamicWindow(baseParams.lsfParams.windowType);

		repeatSkipCount = 0; // -1:skip frame, 0:no repetition (use synthesized frame as it is), >0: number of repetitions for
								// synthesized frame

		// Compute new frame sizes, change in durations due to pitch scaling, and required compensation amount in samples
		// &
		// Find out which pitch-scaled frames to repeat/skip for overall duration
		// compensation
		frmSize = inputFrameSize;
		if ((frmSize % 2) != 0)
			frmSize++;
		if (frmSize < 4)
			frmSize = 4;

		if (isVoiced) {
			newFrmSize = (int) (Math.floor(frmSize / pscale + 0.5));
			if ((newFrmSize % 2) != 0)
				newFrmSize++;
			if (newFrmSize < 4)
				newFrmSize = 4;
		} else
			newFrmSize = frmSize;

		newPeriod = (int) Math.floor(((double) newFrmSize) / NUM_PITCH_SYNC_PERIODS + 0.5);
		// Compute duration compensation required:
		// localDurDiffs(i) = (DESIRED)-(AFTER PITCHSCALING)
		// (-) if expansion occured, (+) if compression occured
		// We aim to make this as close to zero as possible in the following duration compensation step
		localDurDiff = nextAdd + (frmSize * tscale - newFrmSize) / NUM_PITCH_SYNC_PERIODS;

		nextAdd = 0;
		if (localDurDiff < -0.1 * newPeriod) // Expansion occured so skip this frame
		{
			repeatSkipCount--;
			if (!isLastInputFrame) {
				nextAdd = localDurDiff + newPeriod;
				localDurDiff = 0;
			}
		} else if (localDurDiff > 0.1 * newPeriod) // Compression occured so repeat this frame
		{
			while (localDurDiff > 0.1 * newPeriod) {
				repeatSkipCount++;
				localDurDiff -= newPeriod;
			}

			if (!isLastInputFrame) {
				nextAdd = localDurDiff;
				localDurDiff = 0;
			}
		}

		sumLocalDurDiffs += localDurDiff;

		if (isLastInputFrame) {
			// Check the final length and perform additional repetitions if necessary
			localDurDiff = sumLocalDurDiffs;
			while (localDurDiff > 0) {
				repeatSkipCount++;
				localDurDiff -= newPeriod;
			}
			//
		}

		if (isLastInputFrame) {
			repeatSkipCount++;
			bLastFrame = true;
		}

		double[] tmpSpec;
		ComplexArray tmpComp;

		LpCoeffs inputLPCoeffs = null;
		double[] inputLpcs = null;
		double[] inputLsfs = null;
		double sqrtInputGain;
		double[] targetLpcs = null;

		ComplexArray inputDft = null;
		ComplexArray inputExpTerm = null;
		ComplexArray outputExpTerm = null;
		ComplexArray inputResidual = null;
		ComplexArray outputResidual = null;
		ComplexArray outputDft = null;

		double[] inputVocalTractSpectrum = null;
		double[] interpolatedInputLpcs = null;
		double[] sourceVocalTractSpectrumEstimate = null;
		double[] targetVocalTractSpectrumEstimate = null;
		double[] interpolatedInputVocalTractSpectrum = null;
		double[] outputVocalTractSpectrum = null;
		double[] warpedOutputVocalTractSpectrum = null;
		double[] transformationFilter = null;
		Context currentContext = null;

		if (repeatSkipCount > -1) {
			frm = MathUtils.zeros(frmSize);
			System.arraycopy(frmIn, 0, frm, 0, Math.min(frmIn.length, frmSize));
			wgt = windowIn.values(frmSize);

			if (vscale != 1.0)
				bWarp = true;
			else
				bWarp = false;

			boolean isTransformUnvoiced = true;

			if ((isVoiced && pscale != 1.0) || bWarp || isTransformUnvoiced) {
				frmEn = SignalProcUtils.getEnergy(frm);

				wgt = windowIn.values(frmSize);

				// Windowing
				for (j = 0; j < frmSize; j++)
					frm[j] = frm[j] * wgt[j];

				// Preemphasis
				frm = SignalProcUtils.applyPreemphasis(frm, baseParams.lsfParams.preCoef);

				// Compute LPC coefficients
				inputLPCoeffs = LpcAnalyser.calcLPC(frm, baseParams.lsfParams.dimension);
				inputLpcs = inputLPCoeffs.getOneMinusA();

				if (!baseParams.lsfParams.isBarkScaled)
					inputLsfs = LsfAnalyser.lpc2lsfInHz(inputLpcs, fs);
				else
					inputLsfs = LsfAnalyser.lpc2lsfInBark(inputLpcs, fs);

				sqrtInputGain = inputLPCoeffs.getGain();

				// Find target estimate from codebook
				if (baseParams.isVocalTractTransformation) {
					if (mapper instanceof WeightedCodebookMapper) {
						if (baseParams instanceof WeightedCodebookTransformerParams
								&& ((WeightedCodebookTransformerParams) baseParams).isContextBasedPreselection) {
							currentContext = new Context(labels, currentLabelIndex,
									((WeightedCodebookTransformerParams) baseParams).totalContextNeighbours);
							preselectedIndices = ((WeightedCodebookMapper) mapper).preselect(currentContext,
									(WeightedCodebook) data,
									((WeightedCodebookTransformerParams) baseParams).isVocalTractMatchUsingTargetModel,
									((WeightedCodebookTransformerParams) baseParams).mapperParams.numBestMatches);
						}

						if (preselectedIndices != null)
							match = ((WeightedCodebookMapper) mapper).transform(inputLsfs, (WeightedCodebook) data,
									baseParams.isVocalTractMatchUsingTargetModel, preselectedIndices);
						else
							match = ((WeightedCodebookMapper) mapper).transform(inputLsfs, (WeightedCodebook) data,
									baseParams.isVocalTractMatchUsingTargetModel, allIndices);

						// Use source for testing things. Don´t forget to set isSourceVocalTractFromCodeook=false
						// codebookMatch = new WeightedCodebookMatch(inputLsfs, inputLsfs);
					} else if (mapper instanceof JointGMMMapper) {
						// Different weighting strategies can be tested here, i.e. doing a fuzzy phone classification
						double[] gmmWeights = new double[((JointGMMSet) data).gmms.length];
						Arrays.fill(gmmWeights, 0.0);
						int currentGmmIndex = -1;
						if (currentLabelIndex >= 0 && currentLabelIndex < labels.items.length)
							currentGmmIndex = ((JointGMMSet) data).cgParams.getClassIndex(labels.items[currentLabelIndex].phn);
						if (currentGmmIndex > 0)
							gmmWeights[currentGmmIndex] = 1.0;

						gmmWeights = MathUtils.normalizeToSumUpTo(gmmWeights, 1.0);

						match = ((JointGMMMapper) mapper).transform(inputLsfs, (JointGMMSet) data, gmmWeights,
								baseParams.isVocalTractMatchUsingTargetModel);
					} else if (mapper instanceof TargetLsfCopyMapper) {
						match = ((TargetLsfCopyMapper) mapper).transform(psFrm.getCurrentTime(), labels, targetLabels,
								targetLsfs, baseParams.lsfParams.winsize, baseParams.lsfParams.skipsize);
					}
				}

				inputDft = new ComplexArray(frmSize);
				int maxFreq = frmSize / 2 + 1;

				System.arraycopy(frm, 0, inputDft.real, 0, Math.min(frmSize, inputDft.real.length));

				if (inputDft.real.length > frmSize)
					Arrays.fill(inputDft.real, inputDft.real.length - frmSize, inputDft.real.length - 1, 0);

				Arrays.fill(inputDft.imag, 0, inputDft.imag.length - 1, 0);

				inputDft = FFTMixedRadix.fftComplex(inputDft);

				// For checking
				if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
					tmpComp = new ComplexArray(inputDft);
					tmpSpec = MathUtils.dft2ampdb(tmpComp, 0, maxFreq);
					DisplayUtils.plot(tmpSpec, "1.Input DFT");
				}
				//

				inputExpTerm = LpcAnalyser.calcExpTerm(frmSize, baseParams.lsfParams.dimension);
				outputExpTerm = LpcAnalyser.calcExpTerm(newFrmSize, baseParams.lsfParams.dimension);

				inputVocalTractSpectrum = LpcAnalyser.calcSpecLinearFromOneMinusA(inputLPCoeffs.getOneMinusA(),
						(float) sqrtInputGain, frmSize, inputExpTerm);

				// Use a weighted codebook estimate of the input vocal tract spectrum. This will result in a smoother
				// transformation filter
				if (baseParams.isSourceVocalTractSpectrumFromModel && baseParams.isVocalTractTransformation) {
					if (mapper instanceof WeightedCodebookMapper) {
						if (!baseParams.isResynthesizeVocalTractFromSourceModel) {
							if (!baseParams.lsfParams.isBarkScaled)
								interpolatedInputLpcs = LsfAnalyser.lsfInHz2lpc(
										((WeightedCodebookLsfMatch) match).entry.sourceItem.lsfs, fs);
							else
								interpolatedInputLpcs = LsfAnalyser.lsfInBark2lpc(
										((WeightedCodebookLsfMatch) match).entry.sourceItem.lsfs, fs);
						} else {
							if (!baseParams.lsfParams.isBarkScaled)
								interpolatedInputLpcs = LsfAnalyser.lsfInHz2lpc(
										((WeightedCodebookLsfMatch) match).entry.targetItem.lsfs, fs);
							else
								interpolatedInputLpcs = LsfAnalyser.lsfInBark2lpc(
										((WeightedCodebookLsfMatch) match).entry.targetItem.lsfs, fs);
						}
					} else if (mapper instanceof JointGMMMapper) {
						if (!baseParams.isResynthesizeVocalTractFromSourceModel) {
							if (!baseParams.lsfParams.isBarkScaled)
								interpolatedInputLpcs = LsfAnalyser.lsfInHz2lpc(((JointGMMMatch) match).mappedSourceFeatures, fs);
							else
								interpolatedInputLpcs = LsfAnalyser.lsfInBark2lpc(((JointGMMMatch) match).mappedSourceFeatures,
										fs);
						} else {
							if (!baseParams.lsfParams.isBarkScaled)
								interpolatedInputLpcs = LsfAnalyser.lsfInHz2lpc(((JointGMMMatch) match).outputFeatures, fs);
							else
								interpolatedInputLpcs = LsfAnalyser.lsfInBark2lpc(((JointGMMMatch) match).outputFeatures, fs);
						}
					} else if (mapper instanceof TargetLsfCopyMapper)
						interpolatedInputLpcs = ArrayUtils.copy(inputLpcs);

					sourceVocalTractSpectrumEstimate = LpcAnalyser.calcSpecLinearFromOneMinusA(interpolatedInputLpcs, 1.0f,
							newFrmSize, outputExpTerm);
				}

				// For checking
				if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
					tmpSpec = new double[maxFreq];
					System.arraycopy(inputVocalTractSpectrum, 0, tmpSpec, 0, tmpSpec.length);
					tmpSpec = MathUtils.amp2db(tmpSpec);
					DisplayUtils.plot(tmpSpec, "2.Input Vocal Tract");
					FileUtils.writeToTextFile(inputVocalTractSpectrum, "d:/hmmTest_inputVT.txt");
				}
				//

				// For checking
				if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime
						&& baseParams.isSourceVocalTractSpectrumFromModel && baseParams.isVocalTractTransformation) {
					tmpSpec = new double[maxFreq];
					System.arraycopy(sourceVocalTractSpectrumEstimate, 0, tmpSpec, 0, tmpSpec.length);
					tmpSpec = MathUtils.amp2db(tmpSpec);
					DisplayUtils.plot(tmpSpec, "3.Source Vocal Tract Estimate");
					FileUtils.writeToTextFile(sourceVocalTractSpectrumEstimate, "d:/hmmTest_estimateVT.txt");
				}
				//

				inputResidual = new ComplexArray(frmSize);

				// Filter out vocal tract to obtain the input residual spectrum (note that this is the real residual spectrum)
				for (k = 0; k < maxFreq; k++) {
					inputResidual.real[k] = inputDft.real[k] / inputVocalTractSpectrum[k];
					inputResidual.imag[k] = inputDft.imag[k] / inputVocalTractSpectrum[k];
				}

				// For checking
				if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
					tmpComp = new ComplexArray(inputResidual);
					tmpSpec = MathUtils.dft2ampdb(tmpComp, 0, maxFreq - 1);
					DisplayUtils.plot(tmpSpec, "4.Input Residual");
				}
				//

				int newMaxFreq = newFrmSize / 2 + 1;
				if (baseParams.isVocalTractTransformation) {
					// Smoothing
					if (baseParams.smoothingMethod == SmoothingDefinitions.OUTPUT_LSFCONTOUR_SMOOTHING) {
						if (baseParams.smoothingState == SmoothingDefinitions.ESTIMATING_SMOOTHED_VOCAL_TRACT) {
							if (!baseParams.isResynthesizeVocalTractFromSourceModel)
								smoothingFile.writeSingle(((WeightedCodebookLsfMatch) match).entry.targetItem.lsfs);
							else
								smoothingFile.writeSingle(((WeightedCodebookLsfMatch) match).entry.sourceItem.lsfs);
						} else if (baseParams.smoothingState == SmoothingDefinitions.TRANSFORMING_TO_SMOOTHED_VOCAL_TRACT) {
							if (!baseParams.isResynthesizeVocalTractFromSourceModel)
								((WeightedCodebookLsfMatch) match).entry.targetItem.setLsfs(smoothedVocalTract[smoothedInd]);
							else
								((WeightedCodebookLsfMatch) match).entry.sourceItem.setLsfs(smoothedVocalTract[smoothedInd]);
						}
					}
					//

					if (match instanceof WeightedCodebookLsfMatch) {
						if (!baseParams.isResynthesizeVocalTractFromSourceModel) {
							if (!baseParams.lsfParams.isBarkScaled)
								targetLpcs = LsfAnalyser
										.lsfInHz2lpc(((WeightedCodebookLsfMatch) match).entry.targetItem.lsfs, fs);
							else
								targetLpcs = LsfAnalyser.lsfInBark2lpc(((WeightedCodebookLsfMatch) match).entry.targetItem.lsfs,
										fs);
						} else {
							if (!baseParams.lsfParams.isBarkScaled)
								targetLpcs = LsfAnalyser
										.lsfInHz2lpc(((WeightedCodebookLsfMatch) match).entry.sourceItem.lsfs, fs);
							else
								targetLpcs = LsfAnalyser.lsfInBark2lpc(((WeightedCodebookLsfMatch) match).entry.sourceItem.lsfs,
										fs);
						}
					} else if (match instanceof JointGMMMatch) {
						if (!baseParams.isResynthesizeVocalTractFromSourceModel) {
							if (!baseParams.lsfParams.isBarkScaled)
								targetLpcs = LsfAnalyser.lsfInHz2lpc(((JointGMMMatch) match).outputFeatures, fs);
							else
								targetLpcs = LsfAnalyser.lsfInBark2lpc(((JointGMMMatch) match).outputFeatures, fs);
						} else {
							if (!baseParams.lsfParams.isBarkScaled)
								targetLpcs = LsfAnalyser.lsfInHz2lpc(((JointGMMMatch) match).mappedSourceFeatures, fs);
							else
								targetLpcs = LsfAnalyser.lsfInBark2lpc(((JointGMMMatch) match).mappedSourceFeatures, fs);
						}
					} else if (match instanceof LsfMatch) {
						if (!baseParams.lsfParams.isBarkScaled)
							targetLpcs = LsfAnalyser.lsfInHz2lpc(((LsfMatch) match).lsfs, fs);
						else
							targetLpcs = LsfAnalyser.lsfInBark2lpc(((LsfMatch) match).lsfs, fs);
					}

					if (frmSize != newFrmSize) {
						if (outputExpTerm == null || newMaxFreq * baseParams.lsfParams.dimension != outputExpTerm.real.length)
							outputExpTerm = LpcAnalyser.calcExpTerm(newFrmSize, baseParams.lsfParams.dimension);

						targetVocalTractSpectrumEstimate = LpcAnalyser.calcSpecLinearFromOneMinusA(targetLpcs, 1.0f, newFrmSize,
								outputExpTerm);
					} else
						targetVocalTractSpectrumEstimate = LpcAnalyser.calcSpecLinearFromOneMinusA(targetLpcs, 1.0f, newFrmSize,
								inputExpTerm);

					for (k = 0; k < newMaxFreq; k++)
						targetVocalTractSpectrumEstimate[k] *= sqrtInputGain;
				}

				// For checking
				if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime && baseParams.isVocalTractTransformation) {
					tmpSpec = new double[newMaxFreq];
					System.arraycopy(targetVocalTractSpectrumEstimate, 0, tmpSpec, 0, tmpSpec.length);
					tmpSpec = MathUtils.amp2db(tmpSpec);
					DisplayUtils.plot(tmpSpec, "5.Target Vocal Tract Estimate");
				}
				//

				outputVocalTractSpectrum = new double[newMaxFreq];
				interpolatedInputVocalTractSpectrum = MathUtils.interpolate(inputVocalTractSpectrum, newMaxFreq);

				if (baseParams.isVocalTractTransformation) {
					if (baseParams.isSourceVocalTractSpectrumFromModel) {
						for (k = 0; k < newMaxFreq; k++)
							outputVocalTractSpectrum[k] = targetVocalTractSpectrumEstimate[k]
									/ sourceVocalTractSpectrumEstimate[k] * interpolatedInputVocalTractSpectrum[k];
					} else {
						for (k = 0; k < newMaxFreq; k++)
							outputVocalTractSpectrum[k] = targetVocalTractSpectrumEstimate[k];
					}
				} else {
					for (k = 0; k < newMaxFreq; k++)
						outputVocalTractSpectrum[k] = interpolatedInputVocalTractSpectrum[k];
				}

				// MaryUtils.plot(MathUtils.amp2db(inputVocalTractSpectrum));
				// MaryUtils.plot(MathUtils.amp2db(interpolatedInputVocalTractSpectrum));

				// Estimate transformation filter
				if (baseParams.isVocalTractTransformation) {
					transformationFilter = new double[newMaxFreq];

					if (baseParams.isSourceVocalTractSpectrumFromModel) {
						for (k = 0; k < newMaxFreq; k++)
							transformationFilter[k] = targetVocalTractSpectrumEstimate[k] / sourceVocalTractSpectrumEstimate[k];
					} else {
						for (k = 0; k < newMaxFreq; k++)
							transformationFilter[k] = targetVocalTractSpectrumEstimate[k]
									/ interpolatedInputVocalTractSpectrum[k];
					}
					//

					// Smoothing
					if (baseParams.smoothingMethod == SmoothingDefinitions.TRANSFORMATION_FILTER_SMOOTHING) {
						if (baseParams.smoothingState == SmoothingDefinitions.ESTIMATING_SMOOTHED_VOCAL_TRACT) {
							smoothingFile.writeSingle(transformationFilter);

							// For checking
							if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
								tmpSpec = new double[newMaxFreq];
								System.arraycopy(transformationFilter, 0, tmpSpec, 0, tmpSpec.length);
								tmpSpec = MathUtils.amp2db(tmpSpec);
								DisplayUtils.plot(tmpSpec, "6.Transformation filter");
							}
						} else if (baseParams.smoothingState == SmoothingDefinitions.TRANSFORMING_TO_SMOOTHED_VOCAL_TRACT) {
							if (baseParams.isSourceVocalTractSpectrumFromModel) {
								for (k = 0; k < newMaxFreq; k++)
									outputVocalTractSpectrum[k] = smoothedVocalTract[smoothedInd][k]
											* sourceVocalTractSpectrumEstimate[k];
							} else {
								for (k = 0; k < newMaxFreq; k++)
									outputVocalTractSpectrum[k] = smoothedVocalTract[smoothedInd][k]
											* interpolatedInputVocalTractSpectrum[k];
							}

							// For checking
							if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
								tmpSpec = new double[newMaxFreq];
								System.arraycopy(smoothedVocalTract[smoothedInd], 0, tmpSpec, 0, tmpSpec.length);
								tmpSpec = MathUtils.amp2db(tmpSpec);
								DisplayUtils.plot(tmpSpec, "6.Smoothed transformation filter");
							}
						} else {
							// For checking
							if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
								tmpSpec = new double[newMaxFreq];
								System.arraycopy(transformationFilter, 0, tmpSpec, 0, tmpSpec.length);
								tmpSpec = MathUtils.amp2db(tmpSpec);
								DisplayUtils.plot(tmpSpec, "6.Transformation filter");
							}
						}
					} else {
						// For checking
						if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
							tmpSpec = new double[newMaxFreq];
							System.arraycopy(transformationFilter, 0, tmpSpec, 0, tmpSpec.length);
							tmpSpec = MathUtils.amp2db(tmpSpec);
							DisplayUtils.plot(tmpSpec, "6.Transformation filter");
						}
					}
				}
				//

				// Perform additional vocal tract scaling
				if (bWarp) {
					tmpvsc[0] = vscale;
					newVScales = MathUtils.modifySize(tmpvsc, newMaxFreq); // Modify length to match current length of spectrum

					for (k = 0; k < newVScales.length; k++) {
						if (newVScales[k] < 0.05) // Put a floor to avoid divide by zero
							newVScales[k] = 0.05;
					}

					warpedOutputVocalTractSpectrum = new double[newMaxFreq];

					for (k = 0; k < newMaxFreq; k++) {
						wInd = (int) Math.floor((k + 1) / newVScales[k] + 0.5); // Find new indices
						if (wInd < 1)
							wInd = 1;
						if (wInd > newMaxFreq)
							wInd = newMaxFreq;

						warpedOutputVocalTractSpectrum[k] = outputVocalTractSpectrum[wInd - 1];
					}

					System.arraycopy(warpedOutputVocalTractSpectrum, 0, outputVocalTractSpectrum, 0, newMaxFreq);
				}

				// Create output DFT spectrum
				outputResidual = new ComplexArray(newFrmSize);
				outputResidual.real = MathUtils.zeros(newFrmSize);
				outputResidual.imag = MathUtils.zeros(newFrmSize);

				System.arraycopy(inputResidual.real, 0, outputResidual.real, 0, Math.min(maxFreq, newFrmSize));
				System.arraycopy(inputResidual.imag, 0, outputResidual.imag, 0, Math.min(maxFreq, newFrmSize));

				// Copy & paste samples if required (COMPLEX VERSION TO SUPPORT PSCALE<=0.5)
				// This version fills the spectrum by flipping and pasting the original freq bins as many times as required.
				kMax = 1;
				while (newMaxFreq > (kMax + 1) * (maxFreq - 2))
					kMax++;

				for (k = 1; k <= kMax; k++) {
					tmpFix = (maxFreq - 2) * k;
					if (k % 2 == 1) // Odd mode
					{
						tmpAdd = maxFreq + 2;
						tmpMul = 1;
					} else {
						tmpAdd = -1;
						tmpMul = -1;
					}

					for (j = tmpFix + 3; j <= Math.min(newMaxFreq, maxFreq + tmpFix); j++) {
						outputResidual.real[j - 1] = inputResidual.real[tmpMul * (tmpFix - j) + tmpAdd - 1];
						outputResidual.imag[j - 1] = inputResidual.imag[tmpMul * (tmpFix - j) + tmpAdd - 1];
					}
				}

				outputResidual.real[newMaxFreq - 1] = Math.sqrt(outputResidual.real[newMaxFreq - 1]
						* outputResidual.real[newMaxFreq - 1] + outputResidual.imag[newMaxFreq - 1]
						* outputResidual.imag[newMaxFreq - 1]);
				outputResidual.imag[newMaxFreq - 1] = 0.0;

				// For checking
				if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
					tmpComp = new ComplexArray(outputResidual);
					tmpSpec = MathUtils.dft2ampdb(tmpComp, 0, newMaxFreq - 1);
					DisplayUtils.plot(tmpSpec, "7.Output Residual");
				}
				//

				// Filter the output residual with the estimated target vocal tract spectrum
				outputDft = new ComplexArray(newFrmSize);

				// Smoothing
				if (baseParams.smoothingMethod == SmoothingDefinitions.OUTPUT_VOCALTRACTSPECTRUM_SMOOTHING) {
					if (baseParams.smoothingState == SmoothingDefinitions.ESTIMATING_SMOOTHED_VOCAL_TRACT) {
						smoothingFile.writeSingle(outputVocalTractSpectrum, newMaxFreq);
					} else if (baseParams.smoothingState == SmoothingDefinitions.TRANSFORMING_TO_SMOOTHED_VOCAL_TRACT) {
						for (k = 0; k < newMaxFreq; k++)
							outputVocalTractSpectrum[k] = smoothedVocalTract[smoothedInd][k];
					}
				}
				//

				// For checking
				if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
					tmpSpec = new double[newMaxFreq];
					System.arraycopy(outputVocalTractSpectrum, 0, tmpSpec, 0, tmpSpec.length);
					tmpSpec = MathUtils.amp2db(tmpSpec);
					DisplayUtils.plot(tmpSpec, "8.Output Vocal Tract");
				}
				//

				for (k = 1; k <= newMaxFreq; k++) {
					outputDft.real[k - 1] = outputResidual.real[k - 1] * outputVocalTractSpectrum[k - 1];
					outputDft.imag[k - 1] = outputResidual.imag[k - 1] * outputVocalTractSpectrum[k - 1];
				}

				for (k = newMaxFreq + 1; k <= newFrmSize; k++) {
					outputDft.real[k - 1] = outputDft.real[2 * newMaxFreq - 1 - k];
					outputDft.imag[k - 1] = -outputDft.imag[2 * newMaxFreq - 1 - k];
				}

				// For checking
				if (bShowSpectralPlots && psFrm.getCurrentTime() >= desiredFrameTime) {
					tmpComp = new ComplexArray(outputDft);
					tmpSpec = MathUtils.dft2ampdb(tmpComp, 0, newMaxFreq);
					DisplayUtils.plot(tmpSpec, "9.Output DFT");
					bShowSpectralPlots = false;
				}
				//

				// Convert back to time domain
				outputDft = FFTMixedRadix.ifft(outputDft);

				frmy = new double[newFrmSize];
				System.arraycopy(outputDft.real, 0, frmy, 0, newFrmSize);
			} else {
				if (frmSize < newFrmSize)
					newFrmSize = frmSize;

				frmy = new double[newFrmSize];
			}

			frmy = SignalProcUtils.removePreemphasis(frmy, baseParams.lsfParams.preCoef);

			frmyEn = SignalProcUtils.getEnergy(frmy);

			gain = (frmEn / Math.sqrt(frmSize)) / (frmyEn / Math.sqrt(newFrmSize)) * escale;

			if (!(isVoiced && pscale != 1.0) && !bWarp && !isTransformUnvoiced) {
				for (k = 0; k < frmSize; k++)
					frmy[k] = frm[k] * wgt[k];
			}

			// Energy scale compensation + modification
			for (k = 0; k < newFrmSize; k++)
				frmy[k] *= gain;

			for (j = 1; j <= repeatSkipCount + 1; j++) {
				if (!baseParams.isFixedRateVocalTractConversion) {
					if (isVoiced)
						newSkipSize = (int) Math.floor(currentPeriod / pscale + 0.5);
					else
						newSkipSize = (int) Math.floor(currentPeriod + 0.5);
				} else
					newSkipSize = (int) Math.floor(ssFixedInSeconds * fs + 0.5);

				if ((isLastInputFrame && j == repeatSkipCount + 1)) // | (i~=numfrm & all(repeatSkipCounts(i+1:numfrm)==-1)))
					bLastFrame = true;
				else
					bLastFrame = false;

				synthFrameInd++;

				wgty = windowOut.values(newFrmSize);

				if (synthFrameInd == 1) // First frame: Do not window the first half of output speech frame to prevent overflow in
										// normalization with hanning coeffs
				{
					halfWin = (int) Math.floor(newFrmSize / 2.0 + 0.5);
					synthTotal = synthSt + newFrmSize;

					// Keep output in an overlap-add buffer
					if (ySynthInd + newFrmSize - 1 <= maxNewFrmSize) {
						for (k = ySynthInd; k <= ySynthInd + halfWin - 1; k++) {
							ySynthBuff[k - 1] = frmy[k - ySynthInd];
							wSynthBuff[k - 1] = 1.0;
						}

						for (k = ySynthInd + halfWin; k <= ySynthInd + newFrmSize - 1; k++) {
							ySynthBuff[k - 1] += frmy[k - ySynthInd] * wgty[k - ySynthInd];
							wSynthBuff[k - 1] += wgty[k - ySynthInd] * wgty[k - ySynthInd];
						}
					} else {
						for (k = ySynthInd; k <= maxNewFrmSize; k++) {
							if (k - ySynthInd < halfWin) {
								ySynthBuff[k - 1] = frmy[k - ySynthInd];
								wSynthBuff[k - 1] = 1.0;
							} else {
								ySynthBuff[k - 1] += frmy[k - ySynthInd] * wgty[k - ySynthInd];
								wSynthBuff[k - 1] += wgty[k - ySynthInd] * wgty[k - ySynthInd];
							}
						}

						for (k = 1; k <= newFrmSize - 1 - maxNewFrmSize + ySynthInd; k++) {
							if (maxNewFrmSize - ySynthInd + k < halfWin) {
								ySynthBuff[k - 1] = frmy[maxNewFrmSize - ySynthInd + k];
								wSynthBuff[k - 1] = 1.0;
							} else {
								ySynthBuff[k - 1] += frmy[maxNewFrmSize - ySynthInd + k] * wgty[maxNewFrmSize - ySynthInd + k];
								wSynthBuff[k - 1] += wgty[maxNewFrmSize - ySynthInd + k] * wgty[maxNewFrmSize - ySynthInd + k];
							}
						}
					}
					//

					if (!bSilent)
						System.out.println("Synthesized using frame " + String.valueOf(inputFrameIndex + 1));
				} else if (bLastFrame) // Last frame: Do not window the second half of output speech frame to prevent overflow in
										// normalization with hanning coeffs
				{
					halfWin = (int) Math.floor(newFrmSize / 2.0 + 0.5);
					remain = newFrmSize - halfWin;
					synthTotal = synthSt + halfWin + remain - 1;

					// Keep output in an overlap-add buffer
					if (ySynthInd + newFrmSize - 1 <= maxNewFrmSize) {
						for (k = ySynthInd; k <= ySynthInd + halfWin - 1; k++) {
							ySynthBuff[k - 1] += frmy[k - ySynthInd] * wgty[k - ySynthInd];
							wSynthBuff[k - 1] += wgty[k - ySynthInd] * wgty[k - ySynthInd];
						}

						for (k = ySynthInd + halfWin; k <= ySynthInd + newFrmSize - 1; k++) {
							ySynthBuff[k - 1] += frmy[k - ySynthInd];
							wSynthBuff[k - 1] = 1.0;
						}
					} else {
						for (k = ySynthInd; k <= maxNewFrmSize; k++) {
							if (k - ySynthInd < halfWin) {
								ySynthBuff[k - 1] += frmy[k - ySynthInd] * wgty[k - ySynthInd];
								wSynthBuff[k - 1] += wgty[k - ySynthInd] * wgty[k - ySynthInd];
							} else {
								ySynthBuff[k - 1] += frmy[k - ySynthInd];
								wSynthBuff[k - 1] = 1.0;
							}
						}

						for (k = 1; k <= newFrmSize - 1 - maxNewFrmSize + ySynthInd; k++) {
							if (maxNewFrmSize - ySynthInd + k < halfWin) {
								ySynthBuff[k - 1] += frmy[maxNewFrmSize - ySynthInd + k] * wgty[maxNewFrmSize - ySynthInd + k];
								wSynthBuff[k - 1] += wgty[maxNewFrmSize - ySynthInd + k] * wgty[maxNewFrmSize - ySynthInd + k];
							} else {
								ySynthBuff[k - 1] += frmy[maxNewFrmSize - ySynthInd + k];
								wSynthBuff[k - 1] = 1.0;
							}
						}
					}
					//

					if (!bSilent)
						System.out.println("Synthesized using frame " + String.valueOf(inputFrameIndex + 1));
				} else // Normal frame
				{
					if (!isVoiced && ((repeatSkipCount % 2) == 1)) // Reverse unvoiced repeated frames once in two consecutive
																	// repetitions to reduce distortion
						frmy = SignalProcUtils.reverse(frmy);

					synthTotal = synthSt + newFrmSize;

					// Keep output in an overlap-add buffer
					if (ySynthInd + newFrmSize - 1 <= maxNewFrmSize) {
						for (k = ySynthInd; k <= ySynthInd + newFrmSize - 1; k++) {
							ySynthBuff[k - 1] += frmy[k - ySynthInd] * wgty[k - ySynthInd];
							wSynthBuff[k - 1] += wgty[k - ySynthInd] * wgty[k - ySynthInd];
						}
					} else {
						for (k = ySynthInd; k <= maxNewFrmSize; k++) {
							ySynthBuff[k - 1] += frmy[k - ySynthInd] * wgty[k - ySynthInd];
							wSynthBuff[k - 1] += wgty[k - ySynthInd] * wgty[k - ySynthInd];
						}

						for (k = 1; k <= newFrmSize - 1 - maxNewFrmSize + ySynthInd; k++) {
							ySynthBuff[k - 1] += frmy[k + maxNewFrmSize - ySynthInd] * wgty[k + maxNewFrmSize - ySynthInd];
							wSynthBuff[k - 1] += wgty[k + maxNewFrmSize - ySynthInd] * wgty[k + maxNewFrmSize - ySynthInd];
						}
					}
					//

					if (!bSilent) {
						if (j == 1)
							System.out.println("Synthesized using frame " + String.valueOf(inputFrameIndex + 1));
						else
							System.out.println("Repeated using frame " + String.valueOf(inputFrameIndex + 1));
					}
				}

				// Write to output buffer
				for (k = 0; k <= newSkipSize - 1; k++) {
					kInd = (k + ySynthInd) % maxNewFrmSize;
					if (kInd == 0)
						kInd = maxNewFrmSize;

					if (wSynthBuff[kInd - 1] > 0.0)
						outBuff[outBuffStart - 1] = ySynthBuff[kInd - 1] / wSynthBuff[kInd - 1];
					else
						outBuff[outBuffStart - 1] = ySynthBuff[kInd - 1];

					ySynthBuff[kInd - 1] = 0.0;
					wSynthBuff[kInd - 1] = 0.0;

					outBuffStart++;

					if (outBuffStart > outBuffLen) {
						if (tscaleSingle != 1.0 || totalWrittenToFile + outBuffLen <= origLen) {
							dout.writeDouble(outBuff, 0, outBuffLen);
							totalWrittenToFile += outBuffLen;
						} else {
							dout.writeDouble(outBuff, 0, origLen - totalWrittenToFile);
							totalWrittenToFile = origLen;
						}

						outBuffStart = 1;
					}
				}
				//

				synthSt += newSkipSize;

				// if (!bLastFrame)
				// {
				if (ySynthInd + newSkipSize <= maxNewFrmSize)
					ySynthInd += newSkipSize;
				else
					ySynthInd += newSkipSize - maxNewFrmSize;
				// }
				// ///////

				if (bLastFrame) {
					bBroke = true;
					break;
				}
			}
		} else {
			if (!bSilent)
				System.out.println("Skipped frame " + String.valueOf(inputFrameIndex + 1));
		}

		inputFrameIndex++;

		return output;
	}

	public double[] writeFinal() throws IOException {
		double[] output = null;
		double[] outputTmp = null;

		int k, kInd;

		if (tscaleSingle == 1.0)
			synthTotal = origLen;

		if (outBuffLen > synthTotal)
			outBuffLen = synthTotal;

		// Write the final segment
		for (k = synthSt; k <= synthTotal; k++) {
			kInd = (k - synthSt + ySynthInd) % maxNewFrmSize;

			if (kInd == 0)
				kInd = maxNewFrmSize;

			if (wSynthBuff[kInd - 1] > 0.0)
				outBuff[outBuffStart - 1] = ySynthBuff[kInd - 1] / wSynthBuff[kInd - 1];
			else
				outBuff[outBuffStart - 1] = ySynthBuff[kInd - 1];

			ySynthBuff[kInd - 1] = 0.0;
			wSynthBuff[kInd - 1] = 0.0;

			outBuffStart++;

			if (outBuffStart > outBuffLen) {
				if (tscaleSingle != 1.0 || totalWrittenToFile + outBuffLen <= origLen) {
					dout.writeDouble(outBuff, 0, outBuffLen);
					totalWrittenToFile += outBuffLen;
				} else {
					dout.writeDouble(outBuff, 0, origLen - totalWrittenToFile);
					totalWrittenToFile = origLen;
				}
				outBuffStart = 1;
			}
		}

		if (outBuffStart > 1) {
			if (tscaleSingle != 1.0 || totalWrittenToFile + outBuffStart - 1 <= origLen) {
				dout.writeDouble(outBuff, 0, outBuffStart - 1);
				totalWrittenToFile += outBuffStart - 1;
			} else {
				dout.writeDouble(outBuff, 0, origLen - totalWrittenToFile);
				totalWrittenToFile = origLen;
			}
		}
		//

		if (dout != null)
			dout.close();

		return output;
	}

	public void convertToWav(AudioFormat audioformat) throws IOException {
		// Read the temp binary file into a wav file and delete the temp binary file
		if (tempOutBinaryFile != null) {
			double[] yOut = null;

			din = new LEDataInputStream(tempOutBinaryFile);
			yOut = din.readDouble(totalWrittenToFile);
			din.close();

			double tmpMax = MathUtils.getAbsMax(yOut);
			if (tmpMax > 1.0) {
				for (int n = 0; n < yOut.length; n++)
					yOut[n] /= tmpMax;
			}

			outputAudio = new DDSAudioInputStream(new BufferedDoubleDataSource(yOut), audioformat);
			AudioSystem.write(outputAudio, AudioFileFormat.Type.WAVE, new File(outputFile));

			FileUtils.delete(tempOutBinaryFile);
			//
		}
	}
}