/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.adaptation.outlier;
import java.io.IOException;
import java.util.Arrays;
import marytts.signalproc.adaptation.OutlierStatus;
import marytts.signalproc.adaptation.codebook.WeightedCodebook;
import marytts.signalproc.adaptation.codebook.WeightedCodebookFile;
import marytts.signalproc.adaptation.codebook.WeightedCodebookFileHeader;
import marytts.signalproc.adaptation.codebook.WeightedCodebookMapperParams;
import marytts.signalproc.analysis.distance.DistanceComputer;
import marytts.util.math.MathUtils;
/**
*
* Single Gaussian based outlier elimination. Looks at the difference distributions of aligned source-target LSF, f0, energy, and
* duration features. Eliminates outliers that fall outside user specified ranges. The ranges can be specified using
* GaussianOutlierEliminatorParams in the form of total standard deviations for each feature. It is also possible to eliminate too
* similar feature pairs to enforce a certain amount of dissimilarity in the training data.
*
* Reference: Türk, O., and Arslan, L. M., 2006, "Robust Processing Techniques for Voice Conversion", Computer Speech and
* Language 20 (2006), pp. 441-467.
*
* @author Oytun Türk
*
*/
public class GaussianOutlierEliminator {
public void eliminate(GaussianOutlierEliminatorParams params, String codebookFileIn, String codebookFileOut) {
WeightedCodebookFile fileIn = new WeightedCodebookFile(codebookFileIn, WeightedCodebookFile.OPEN_FOR_READ);
WeightedCodebook codebookIn = null;
try {
codebookIn = fileIn.readCodebookFile();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
if (codebookIn != null) {
int[] acceptanceStatus = new int[codebookIn.header.totalEntries];
double[] lsfDistances = null;
if (params.isCheckLsfOutliers)
lsfDistances = new double[codebookIn.header.totalEntries];
double[] f0Distances = null;
int[] voicedInds = null;
if (params.isCheckF0Outliers) {
f0Distances = new double[codebookIn.header.totalEntries];
voicedInds = new int[codebookIn.header.totalEntries];
Arrays.fill(voicedInds, -1);
}
double[] durationDistances = null;
if (params.isCheckDurationOutliers)
durationDistances = new double[codebookIn.header.totalEntries];
double[] energyDistances = null;
if (params.isCheckEnergyOutliers)
energyDistances = new double[codebookIn.header.totalEntries];
Arrays.fill(acceptanceStatus, OutlierStatus.NON_OUTLIER);
double lsfDistanceMean = 0.0;
double lsfDistanceStdDev = 0.0;
double f0DistanceMean = 0.0;
double f0DistanceStdDev = 0.0;
int totalVoiced = 0;
double durationDistanceMean = 0.0;
double durationDistanceStdDev = 0.0;
double energyDistanceMean = 0.0;
double energyDistanceStdDev = 0.0;
int i;
// Estimate mean of distances between source and target entries
for (i = 0; i < codebookIn.header.totalEntries; i++) {
if (params.isCheckLsfOutliers)
lsfDistances[i] = DistanceComputer.getLsfInverseHarmonicDistance(codebookIn.entries[i].sourceItem.lsfs,
codebookIn.entries[i].targetItem.lsfs, WeightedCodebookMapperParams.DEFAULT_FREQ_RANGE_FOR_LSF_MATCH);
if (params.isCheckF0Outliers && codebookIn.entries[i].sourceItem.f0 > 10.0
&& codebookIn.entries[i].targetItem.f0 > 10.0) {
f0Distances[totalVoiced] = codebookIn.entries[i].sourceItem.f0 - codebookIn.entries[i].targetItem.f0;
voicedInds[totalVoiced] = i;
totalVoiced++;
}
if (params.isCheckDurationOutliers)
durationDistances[i] = codebookIn.entries[i].sourceItem.duration - codebookIn.entries[i].targetItem.duration;
if (params.isCheckEnergyOutliers)
energyDistances[i] = codebookIn.entries[i].sourceItem.energy - codebookIn.entries[i].targetItem.energy;
}
if (params.isCheckLsfOutliers)
lsfDistanceMean = MathUtils.mean(lsfDistances);
if (params.isCheckF0Outliers)
f0DistanceMean = MathUtils.mean(f0Distances, 0, totalVoiced - 1);
if (params.isCheckDurationOutliers)
durationDistanceMean = MathUtils.mean(durationDistances);
if (params.isCheckEnergyOutliers)
energyDistanceMean = MathUtils.mean(energyDistances);
//
// Estimate standard deviation of distances between source and target
lsfDistanceStdDev = 0.5 * Double.MAX_VALUE;
durationDistanceStdDev = 0.5 * Double.MAX_VALUE;
energyDistanceStdDev = 0.5 * Double.MAX_VALUE;
f0DistanceStdDev = 0.5 * Double.MAX_VALUE;
if (codebookIn.header.totalEntries > 1) {
if (params.isCheckLsfOutliers)
lsfDistanceStdDev = MathUtils.standardDeviation(lsfDistances, lsfDistanceMean);
if (params.isCheckDurationOutliers)
durationDistanceStdDev = MathUtils.standardDeviation(durationDistances, durationDistanceMean);
if (params.isCheckEnergyOutliers)
energyDistanceStdDev = MathUtils.standardDeviation(energyDistances, energyDistanceMean);
}
if (params.isCheckF0Outliers && totalVoiced > 1)
f0DistanceStdDev = MathUtils.standardDeviation(f0Distances, f0DistanceMean, 0, totalVoiced - 1);
//
int totalLsfOutliers = 0;
int totalDurationOutliers = 0;
int totalF0Outliers = 0;
int totalEnergyOutliers = 0;
for (i = 0; i < codebookIn.header.totalEntries; i++) {
if (params.isCheckLsfOutliers) {
if (lsfDistances[i] > lsfDistanceMean + params.totalStandardDeviations.lsf * lsfDistanceStdDev
|| (params.isEliminateTooSimilarLsf && lsfDistances[i] < lsfDistanceMean
- params.totalStandardDeviations.lsf * lsfDistanceStdDev)) {
acceptanceStatus[i] += OutlierStatus.LSF_OUTLIER;
totalLsfOutliers++;
}
}
if (params.isCheckDurationOutliers
&& durationDistances[i] > durationDistanceMean + params.totalStandardDeviations.duration
* durationDistanceStdDev) {
acceptanceStatus[i] += OutlierStatus.DURATION_OUTLIER;
totalDurationOutliers++;
}
if (params.isCheckEnergyOutliers
&& energyDistances[i] > energyDistanceMean + params.totalStandardDeviations.energy * energyDistanceStdDev) {
acceptanceStatus[i] += OutlierStatus.ENERGY_OUTLIER;
totalEnergyOutliers++;
}
}
if (params.isCheckF0Outliers) {
for (i = 0; i < totalVoiced; i++) {
if (f0Distances[i] > f0DistanceMean + params.totalStandardDeviations.f0 * f0DistanceStdDev) {
acceptanceStatus[voicedInds[i]] += OutlierStatus.F0_OUTLIER;
totalF0Outliers++;
}
}
}
int newTotalEntries = 0;
for (i = 0; i < codebookIn.header.totalEntries; i++) {
if (acceptanceStatus[i] == OutlierStatus.NON_OUTLIER)
newTotalEntries++;
}
// Write the output codebook
WeightedCodebookFile codebookOut = new WeightedCodebookFile(codebookFileOut, WeightedCodebookFile.OPEN_FOR_WRITE);
WeightedCodebookFileHeader headerOut = new WeightedCodebookFileHeader(codebookIn.header);
headerOut.resetTotalEntries();
codebookOut.writeCodebookHeader(headerOut);
for (i = 0; i < codebookIn.header.totalEntries; i++) {
if (acceptanceStatus[i] == OutlierStatus.NON_OUTLIER)
codebookOut.writeEntry(codebookIn.entries[i]);
}
codebookOut.close();
//
System.out.println("Outliers detected = " + String.valueOf(codebookIn.header.totalEntries - newTotalEntries) + " of "
+ String.valueOf(codebookIn.header.totalEntries));
System.out.println("Total lsf outliers = " + String.valueOf(totalLsfOutliers));
System.out.println("Total f0 outliers = " + String.valueOf(totalF0Outliers));
System.out.println("Total duration outliers = " + String.valueOf(totalDurationOutliers));
System.out.println("Total energy outliers = " + String.valueOf(totalEnergyOutliers));
}
}
}