/** * Copyright 2000-2009 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.signalproc.adaptation.prosody; import marytts.util.display.DisplayUtils; import marytts.util.math.MathUtils; import marytts.util.signal.SignalProcUtils; /* This class implements a number of basic pitch value transformation methods * The most common is the mean and standard deviation transformation in which * a given f0 contour is modified to match target mean and standard deviation values. * * @author Oytun Türk */ public class PitchTransformer { public PitchTransformer() { } public double[] transform(ProsodyTransformerParams param, PitchStatisticsMapping mapping, PitchStatistics inputLocalStatistics, double[] f0s, double[] pscales // Possibly time-varying pitch scaling values // (these are applied after all modifications // as an additinal step!) ) { double[] targetF0s = new double[f0s.length]; if (f0s != null) { int i; if (param.pitchTransformationMethod != ProsodyTransformerParams.NO_TRANSFORMATION && param.pitchTransformationMethod != ProsodyTransformerParams.USE_ONLY_PSCALES) { int bestIndex = -1; PitchStatistics inputStatistics = null; PitchStatistics outputStatistics = null; if (param.pitchTransformationMethod < ProsodyTransformerParams.SENTENCE_MEAN) { if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_HERTZ) inputStatistics = new PitchStatistics(mapping.sourceGlobalStatisticsHz); else if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_LOGHERTZ) inputStatistics = new PitchStatistics(mapping.sourceGlobalStatisticsLogHz); else System.out.println("Error! Statistics type unknown..."); assert inputStatistics != null; if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_HERTZ) outputStatistics = new PitchStatistics(mapping.targetGlobalStatisticsHz); else if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_LOGHERTZ) outputStatistics = new PitchStatistics(mapping.targetGlobalStatisticsLogHz); else System.out.println("Error! Statistics type unknown..."); assert outputStatistics != null; } else { if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_HERTZ) bestIndex = findBestMatch(inputLocalStatistics, mapping.sourceLocalStatisticsHz, mapping.sourceVariancesHz); else if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_LOGHERTZ) bestIndex = findBestMatch(inputLocalStatistics, mapping.sourceLocalStatisticsLogHz, mapping.sourceVariancesLogHz); else System.out.println("Error! Statistics type unknown..."); if (bestIndex < 0) { System.out.println("Error! No best match found..."); assert bestIndex > -1; } if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_HERTZ) { inputStatistics = new PitchStatistics(mapping.sourceLocalStatisticsHz.entries[bestIndex]); outputStatistics = new PitchStatistics(mapping.targetLocalStatisticsHz.entries[bestIndex]); } else if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_LOGHERTZ) { inputStatistics = new PitchStatistics(mapping.sourceLocalStatisticsLogHz.entries[bestIndex]); outputStatistics = new PitchStatistics(mapping.targetLocalStatisticsLogHz.entries[bestIndex]); } else System.out.println("Error! Statistics type unknown..."); assert inputStatistics != null; assert outputStatistics != null; } if (param.isUseInputMeanPitch) inputStatistics.mean = inputLocalStatistics.mean; if (param.isUseInputStdDevPitch) inputStatistics.standardDeviation = inputLocalStatistics.standardDeviation; if (param.isUseInputRangePitch) inputStatistics.range = inputLocalStatistics.range; if (param.isUseInputInterceptPitch) inputStatistics.intercept = inputLocalStatistics.intercept; if (param.isUseInputSlopePitch) inputStatistics.slope = inputLocalStatistics.slope; System.arraycopy(f0s, 0, targetF0s, 0, f0s.length); double minF0ScaleAllowed = -0.5; double maxF0ScaleAllowed = 1.5; double minF0ShiftAllowed = -200.0; double maxF0ShiftAllowed = 200.0; if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_LOGHERTZ) { targetF0s = SignalProcUtils.getLogF0s(targetF0s); minF0ScaleAllowed = Math.log(0.5); maxF0ScaleAllowed = Math.log(maxF0ScaleAllowed); minF0ShiftAllowed = Math.log(0.1); maxF0ShiftAllowed = Math.log(maxF0ShiftAllowed); } // Now, transform inputStatistics to outputStatistics if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_MEAN || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_MEAN) { double meanShift = outputStatistics.mean - inputStatistics.mean; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = targetF0s[i] + meanShift; System.out.println("Mean transformed -> (Output mean f0)-(Input mean f0)=" + String.valueOf(meanShift)); } else if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_STDDEV || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_STDDEV) { double scale = outputStatistics.standardDeviation / inputStatistics.standardDeviation; scale = Math.max(minF0ScaleAllowed, scale); scale = Math.min(maxF0ScaleAllowed, scale); double meanShift = inputStatistics.mean - inputStatistics.mean * scale; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = scale * targetF0s[i] + meanShift; System.out.println("Std.dev. transformed -> (Output f0)/(Input f0)=" + String.valueOf(scale)); } else if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_RANGE || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_RANGE) { double scale = outputStatistics.range / inputStatistics.range; scale = Math.max(minF0ScaleAllowed, scale); scale = Math.min(maxF0ScaleAllowed, scale); double meanShift = inputStatistics.mean - inputStatistics.mean * scale; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = scale * targetF0s[i] + meanShift; System.out.println("Range transformed -> (Output f0)/(Input f0)=" + String.valueOf(scale)); } else if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_SLOPE || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_SLOPE) { double scale = outputStatistics.slope / inputStatistics.slope; scale = Math.max(minF0ScaleAllowed, scale); scale = Math.min(maxF0ScaleAllowed, scale); double meanShift = inputStatistics.mean - inputStatistics.mean * scale; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = scale * targetF0s[i] + meanShift; System.out.println("Slope transformed -> (Output f0)/(Input f0)=" + String.valueOf(scale)); } else if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_INTERCEPT || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_INTERCEPT) { double newMean = (inputStatistics.mean - inputStatistics.intercept) / inputStatistics.slope; newMean = outputStatistics.slope * newMean + outputStatistics.intercept; double meanShift = newMean - inputStatistics.mean; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = targetF0s[i] + meanShift; System.out.println("Intercept transformed -> (Output mean f0)-(Input mean f0)=" + String.valueOf(meanShift)); } else if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_MEAN_STDDEV || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_MEAN_STDDEV) { double scale = outputStatistics.standardDeviation / inputStatistics.standardDeviation; scale = Math.max(minF0ScaleAllowed, scale); scale = Math.min(maxF0ScaleAllowed, scale); double meanShift = outputStatistics.mean - inputStatistics.mean * scale; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = scale * targetF0s[i] + meanShift; System.out.println("Mean & Std.dev. transformed -> (Output mean f0)-(Input mean f0)=" + String.valueOf(meanShift) + " (Output f0)/(Input f0)=" + String.valueOf(scale)); } else if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_MEAN_SLOPE || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_MEAN_SLOPE) { double scale = outputStatistics.slope / inputStatistics.slope; scale = Math.max(minF0ScaleAllowed, scale); scale = Math.min(maxF0ScaleAllowed, scale); double meanShift = outputStatistics.mean - inputStatistics.mean * scale; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = scale * targetF0s[i] + meanShift; System.out.println("Mean & Slope transformed -> (Output mean f0)-(Input mean f0)=" + String.valueOf(meanShift) + " (Output f0)/(Input f0)=" + String.valueOf(scale)); } else if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_INTERCEPT_STDDEV || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_INTERCEPT_STDDEV) { // First STDDEV double scale = outputStatistics.standardDeviation / inputStatistics.standardDeviation; scale = Math.max(minF0ScaleAllowed, scale); scale = Math.min(maxF0ScaleAllowed, scale); double meanShift = inputStatistics.mean - inputStatistics.mean * scale; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = scale * targetF0s[i] + meanShift; // Then INTERCEPT double newMean = (inputStatistics.mean - inputStatistics.intercept) / inputStatistics.slope; newMean = outputStatistics.slope * newMean + outputStatistics.intercept; meanShift = newMean - inputStatistics.mean; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = targetF0s[i] + meanShift; System.out.println("Intercept & Std. dev. transformed -> (Output mean f0)-(Input mean f0)=" + String.valueOf(meanShift) + " (Output f0)/(Input f0)=" + String.valueOf(scale)); } else if (param.pitchTransformationMethod == ProsodyTransformerParams.GLOBAL_INTERCEPT_SLOPE || param.pitchTransformationMethod == ProsodyTransformerParams.SENTENCE_INTERCEPT_SLOPE) { // First SLOPE double scale = outputStatistics.slope / inputStatistics.slope; scale = Math.max(minF0ScaleAllowed, scale); scale = Math.min(maxF0ScaleAllowed, scale); double meanShift = inputStatistics.mean - inputStatistics.mean * scale; for (i = 0; i < targetF0s.length; i++) targetF0s[i] = scale * targetF0s[i] + meanShift; // Then INTERCEPT double newMean = (inputStatistics.mean - inputStatistics.intercept) / inputStatistics.slope; newMean = outputStatistics.slope * newMean + outputStatistics.intercept; meanShift = newMean - inputStatistics.mean; meanShift = Math.max(minF0ShiftAllowed, meanShift); meanShift = Math.min(maxF0ShiftAllowed, meanShift); for (i = 0; i < targetF0s.length; i++) targetF0s[i] = targetF0s[i] + meanShift; System.out.println("Intercept & Slope transformed -> (Output mean f0)-(Input mean f0)=" + String.valueOf(meanShift) + " (Output f0)/(Input f0)=" + String.valueOf(scale)); } if (param.pitchStatisticsType == PitchStatistics.STATISTICS_IN_LOGHERTZ) targetF0s = SignalProcUtils.getExpF0s(targetF0s); } else System.arraycopy(f0s, 0, targetF0s, 0, f0s.length); if (param.pitchTransformationMethod != ProsodyTransformerParams.NO_TRANSFORMATION) { for (i = 0; i < targetF0s.length; i++) targetF0s[i] *= pscales[i]; } for (i = 0; i < targetF0s.length; i++) { if (f0s[i] < 10.0) targetF0s[i] = 0.0; if (targetF0s[i] < 10.0) targetF0s[i] = 0.0; } } if (false && param.pitchTransformationMethod != ProsodyTransformerParams.NO_TRANSFORMATION) { DisplayUtils.plotZoomed(f0s, "Input", 50.0); DisplayUtils.plotZoomed(targetF0s, "Tranformed", 50.0); } return targetF0s; } public int findBestMatch(PitchStatistics oneStatistics, PitchStatisticsCollection multipleStatistics, double[] variances) { int bestIndex = -1; if (multipleStatistics != null && oneStatistics != null && multipleStatistics.entries != null) { if (multipleStatistics.entries.length == 1) bestIndex = 0; else { double[] dists = new double[multipleStatistics.entries.length]; for (int i = 0; i < multipleStatistics.entries.length; i++) { dists[i] = 0.0; dists[i] += (oneStatistics.mean - multipleStatistics.entries[i].mean) * (oneStatistics.mean - multipleStatistics.entries[i].mean) / variances[0]; dists[i] += (oneStatistics.standardDeviation - multipleStatistics.entries[i].standardDeviation) * (oneStatistics.standardDeviation - multipleStatistics.entries[i].standardDeviation) / variances[1]; dists[i] += (oneStatistics.range - multipleStatistics.entries[i].range) * (oneStatistics.range - multipleStatistics.entries[i].range) / variances[2]; dists[i] += (oneStatistics.intercept - multipleStatistics.entries[i].intercept) * (oneStatistics.intercept - multipleStatistics.entries[i].intercept) / variances[3]; dists[i] += (oneStatistics.slope - multipleStatistics.entries[i].slope) * (oneStatistics.slope - multipleStatistics.entries[i].slope) / variances[4]; dists[i] = Math.sqrt(dists[i]); } bestIndex = MathUtils.getMinIndex(dists); } } return bestIndex; } }