/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* Permission is hereby granted, free of charge, to use and distribute
* this software and its documentation without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of this work, and to
* permit persons to whom this work is furnished to do so, subject to
* the following conditions:
*
* 1. The code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* 2. Any modifications must be clearly marked as such.
* 3. Original authors' names are not deleted.
* 4. The authors' names are not used to endorse or promote products
* derived from this software without specific prior written
* permission.
*
* DFKI GMBH AND THE CONTRIBUTORS TO THIS WORK DISCLAIM ALL WARRANTIES WITH
* REGARD TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL DFKI GMBH NOR THE
* CONTRIBUTORS BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL
* DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
* PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
* ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
* THIS SOFTWARE.
*/
package marytts.signalproc.process;
import java.util.Arrays;
import marytts.util.math.ArrayUtils;
import marytts.util.math.MathUtils;
/**
* @author oytun.turk
*
*/
public class TDPSOLAProcessor {
// TDPSOLA - Time domain pitch synchronous overlap-add algorithm
// This version does not actually perform waveform synthesis but computes synthesis time instants from analysis instants
// The output can be used by a waveform generator to perform the actual PSOLA operations
// Important features of the algorithm are:
// (1) Performs duration compensation to match the desired duration exactly (when tscale=1.0)
// (2) Supports pitch scaling factors <<=0.5
// (3) Supports fixed and variable rate pitch/time scaling
//
// analysisInstants: Pitch synchronous analysis time instants, in seconds (Nx1)
// samplingRateInHz: Sampling rate in Hz
// vuvs: Voiced(1)/Unvoiced(0) labels for regions in between two successive pitch marks -- Nx1
// pScales (Nx1): Pitch scaling amount for each frame (pScales[i]<1 => pitch period expansion (lower f0), Pscales[i]>1 =>
// pitch period compression (higher f0)
// tScales (Nx1): Time scaling factor for each frame (tScales[i]<1 => time scale compression, tScales[i]>1 => time scale
// expansion
//
// If the length of the pitch/time scaling vectors are shorter than the fixed skip size frames that can be obtained from the
// signal,
// the vectors are linearly interpolated to match the signal length
//
// @author Oytun Türk
public static TDPSOLAInstants transformAnalysisInstants(float[] analysisInstants, int samplingRateInHz, boolean[] vuvs,
float[] tScales, float[] pScales) {
TDPSOLAInstants synthesisInstants = null;
int numfrm = analysisInstants.length;
// Compute new frame sizes, change in durations due to pitch scaling, and required compensation amount in samples
float[] frmSizesInSeconds = new float[numfrm];
Arrays.fill(frmSizesInSeconds, 0.0f);
float[] newFrmSizesInSeconds = new float[numfrm];
Arrays.fill(newFrmSizesInSeconds, 0.0f);
float[] newPeriodsInSeconds = new float[numfrm];
Arrays.fill(newPeriodsInSeconds, 0.0f);
float[] localDurDiffs = new float[numfrm];
Arrays.fill(localDurDiffs, 0.0f);
float newLenInSeconds = 0.0f;
int i;
for (i = 0; i < numfrm - 1; i++) {
frmSizesInSeconds[i] = analysisInstants[i + 1] - analysisInstants[i];
if (vuvs[i])
newFrmSizesInSeconds[i] = frmSizesInSeconds[i] / pScales[i];
else
newFrmSizesInSeconds[i] = frmSizesInSeconds[i];
newPeriodsInSeconds[i] = newFrmSizesInSeconds[i];
// Compute duration compensation required:
// localDurDiffs(i) = (DESIRED)-(AFTER PITCHSCALING)
// (-) if expansion occured, (+) if compression occured
// We aim to make this as close to zero as possible in the following duration compensation step
localDurDiffs[i] = frmSizesInSeconds[i] * tScales[i] - newFrmSizesInSeconds[i];
newLenInSeconds += newPeriodsInSeconds[i];
}
//
// Find out which pitch-scaled frames to repeat/skip for overall duration compensation
int[] repeatSkipCounts = new int[numfrm]; // -1:skip frame, 0:no repetition (use synthesized frame as it is), >0: number
// of repetitions for synthesized frame
Arrays.fill(repeatSkipCounts, 0);
for (i = 0; i < numfrm; i++) {
if (localDurDiffs[i] < -0.1f * newPeriodsInSeconds[i]) // Expansion occured so skip this frame
{
repeatSkipCounts[i] -= 1;
if (i < numfrm - 1) {
localDurDiffs[i + 1] += localDurDiffs[i] + newPeriodsInSeconds[i];
localDurDiffs[i] = 0.0f;
}
} else if (localDurDiffs[i] > 0.1 * newPeriodsInSeconds[i]) // Compression occured so repeat this frame
{
while (localDurDiffs[i] > 0.1 * newPeriodsInSeconds[i] && newPeriodsInSeconds[i] > 1e-10) {
repeatSkipCounts[i] += 1;
localDurDiffs[i] -= newPeriodsInSeconds[i];
newLenInSeconds += newPeriodsInSeconds[i];
}
if (i < numfrm - 1) {
localDurDiffs[i + 1] += localDurDiffs[i];
localDurDiffs[i] = 0.0f;
}
}
}
//
// Check the final length and perform additional repetitions if necessary
localDurDiffs[numfrm - 1] = MathUtils.sum(localDurDiffs);
while (localDurDiffs[numfrm - 1] > 0.0f && newPeriodsInSeconds[numfrm - 1] > 1e-10) {
repeatSkipCounts[numfrm - 1] += 1;
localDurDiffs[numfrm - 1] -= newPeriodsInSeconds[numfrm - 1];
newLenInSeconds += newPeriodsInSeconds[numfrm - 1];
}
//
int numSynthesisInstants = MathUtils.sum(repeatSkipCounts) + repeatSkipCounts.length;
if (numSynthesisInstants > 0) {
synthesisInstants = new TDPSOLAInstants(numSynthesisInstants);
float synthSt = analysisInstants[0];
float synthTotal = 0.0f;
boolean bFirstSynthFrame = true;
boolean bLastFrame = false;
boolean bBroke = false;
int j, k;
int synthesisFrameCounter = 0;
for (i = 0; i < numfrm; i++) {
if (bBroke)
break;
if (repeatSkipCounts[i] > -1) {
for (j = 1; j <= repeatSkipCounts[i] + 1; j++) {
synthesisInstants.analysisInstantsInSeconds[synthesisFrameCounter] = analysisInstants[i];
synthesisInstants.synthesisInstantsInSeconds[synthesisFrameCounter] = synthSt;
synthesisFrameCounter++;
bLastFrame = false;
if (i == numfrm - 1) {
if (j == repeatSkipCounts[i] + 1)
bLastFrame = true;
} else {
boolean bAll = true;
for (k = i + 1; k <= numfrm; k++) {
if (repeatSkipCounts[k - 1] != -1) {
bAll = false;
break;
}
}
if (bAll)
bLastFrame = true;
}
if (i < numfrm - 1) {
if (vuvs[i])
synthSt += (analysisInstants[i + 1] - analysisInstants[i]) / pScales[i];
else
synthSt += (analysisInstants[i + 1] - analysisInstants[i]);
} else {
if (vuvs[i])
synthSt += (analysisInstants[i] - analysisInstants[i - 1]) / pScales[i];
else
synthSt += (analysisInstants[i] - analysisInstants[i - 1]);
}
if (bLastFrame) {
bBroke = true;
break;
}
}
}
}
}
if (synthesisInstants != null)
synthesisInstants.repeatSkipCounts = ArrayUtils.copy(repeatSkipCounts);
return synthesisInstants;
}
}