/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.adaptation.codebook;
import java.io.IOException;
import javax.sound.sampled.UnsupportedAudioFileException;
import marytts.signalproc.adaptation.BaselineAdaptationSet;
import marytts.signalproc.adaptation.BaselineFeatureExtractor;
import marytts.signalproc.adaptation.BaselinePreprocessor;
import marytts.signalproc.adaptation.outlier.KMeansMappingEliminatorParams;
import marytts.signalproc.adaptation.outlier.TotalStandardDeviations;
import marytts.signalproc.adaptation.prosody.PitchMappingFile;
import marytts.signalproc.analysis.distance.DistanceComputer;
import marytts.signalproc.window.Window;
import marytts.util.string.StringUtils;
/**
*
* This class implements training for weighted codebook mapping based voice conversion using parallel training data (i.e. source
* and target data in pairs of audio recordings which have identical content)
*
* Reference for weighted codebook mapping: Arslan, L. M., 1999, “Speaker Transformation Algorithm using Segmental Codebooks”,
* Speech Communication, 28, pp. 211-226.
*
* Reference for weighted frame mapping: Türk, O., 2007 “Cross-Lingual Voice Conversion”, PhD Thesis, Bogazici University.
*
* @author Oytun Türk
*/
public class WeightedCodebookParallelTrainer extends WeightedCodebookTrainer {
public WeightedCodebookParallelTrainer(BaselinePreprocessor pp, BaselineFeatureExtractor fe, WeightedCodebookTrainerParams pa) {
super(pp, fe, pa);
}
// Call this function after initializing the trainer to perform training
public void run() throws IOException, UnsupportedAudioFileException {
if (checkParams()) {
BaselineAdaptationSet sourceTrainingSet = new BaselineAdaptationSet(wcParams.sourceTrainingFolder);
BaselineAdaptationSet targetTrainingSet = new BaselineAdaptationSet(wcParams.targetTrainingFolder);
train(sourceTrainingSet, targetTrainingSet);
}
}
// Parallel training
public void train(BaselineAdaptationSet sourceTrainingSet, BaselineAdaptationSet targetTrainingSet) throws IOException,
UnsupportedAudioFileException {
int[] map = getIndexedMapping(sourceTrainingSet, targetTrainingSet);
train(sourceTrainingSet, targetTrainingSet, map);
}
public static void main(String[] args) throws UnsupportedAudioFileException, IOException {
// mainAngryF();
// mainHappyF();
// mainSadF();
// mainSadF();
// mainSadLG();
// mainAngryLG();
// mainHappyLG();
mainQuickTest2();
}
public static void mainAngryF() throws UnsupportedAudioFileException, IOException {
BaselinePreprocessor pp = new BaselinePreprocessor();
BaselineFeatureExtractor fe = new BaselineFeatureExtractor();
WeightedCodebookTrainerParams pa = new WeightedCodebookTrainerParams();
pa.codebookHeader.codebookType = WeightedCodebookFileHeader.FRAMES; // Frame-by-frame mapping of features
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.FRAME_GROUPS; pa.codebookHeader.numNeighboursInFrameGroups
// = 3; //Mapping of frame average features (no label information but fixed amount of neighbouring frames is used)
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.LABELS; //Mapping of label average features
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.LABEL_GROUPS; pa.codebookHeader.numNeighboursInLabelGroups
// = 1; //Mapping of average features collected across label groups (i.e. vowels, consonants, etc)
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.SPEECH; //Mapping of average features collected across all
// speech parts (i.e. like spectral equalization)
pa.codebookHeader.sourceTag = "neutralF"; // Source name tag (i.e. style or speaker identity)
pa.codebookHeader.targetTag = "angryF"; // Target name tag (i.e. style or speaker identity)
pa.trainingBaseFolder = "D:/Oytun/DFKI/voices/Interspeech08_out/neutral2angry"; // Training base directory
pa.sourceTrainingFolder = "D:/Oytun/DFKI/voices/Interspeech08/neutral/train_200"; // Source training folder
pa.targetTrainingFolder = "D:/Oytun/DFKI/voices/Interspeech08/angry/train_200"; // Target training folder
pa.indexMapFileExtension = ".imf"; // Index map file extensions
pa.codebookHeader.lsfParams.dimension = 20; // 0:Auto set LP order
pa.codebookHeader.lsfParams.preCoef = 0.97f;
pa.codebookHeader.lsfParams.skipsize = 0.010f;
pa.codebookHeader.lsfParams.winsize = 0.020f;
pa.codebookHeader.lsfParams.windowType = Window.HAMMING;
String baseFile = StringUtils.checkLastSlash(pa.trainingBaseFolder) + pa.codebookHeader.sourceTag + "_X_"
+ pa.codebookHeader.targetTag;
pa.codebookFile = baseFile + "_200" + WeightedCodebookFile.DEFAULT_EXTENSION;
pa.pitchMappingFile = baseFile + "_200" + PitchMappingFile.DEFAULT_EXTENSION;
pa.isForcedAnalysis = false;
pa.codebookHeader.ptcParams.windowSizeInSeconds = 0.040;
pa.codebookHeader.ptcParams.skipSizeInSeconds = 0.005;
pa.codebookHeader.ptcParams.voicingThreshold = 0.30;
pa.codebookHeader.ptcParams.isDoublingCheck = false;
pa.codebookHeader.ptcParams.isHalvingCheck = false;
pa.codebookHeader.ptcParams.minimumF0 = 40.0f;
pa.codebookHeader.ptcParams.maximumF0 = 400.0f;
pa.codebookHeader.ptcParams.centerClippingRatio = 0.3;
pa.codebookHeader.ptcParams.cutOff1 = pa.codebookHeader.ptcParams.minimumF0 - 20.0;
pa.codebookHeader.ptcParams.cutOff2 = pa.codebookHeader.ptcParams.maximumF0 + 200.0;
pa.codebookHeader.energyParams.windowSizeInSeconds = 0.020;
pa.codebookHeader.energyParams.skipSizeInSeconds = 0.010;
TotalStandardDeviations tsd = new TotalStandardDeviations();
tsd.lsf = 1.5;
tsd.f0 = 1.0;
tsd.duration = 1.0;
tsd.energy = 2.0;
// Gaussian outlier eliminator
// Decreasing totalStandardDeviations will lead to more outlier eliminations, i.e. smaller codebooks
pa.gaussianEliminatorParams.isActive = true; // Set to false if you do not want to use this eliminator at all
pa.gaussianEliminatorParams.isCheckLsfOutliers = true;
pa.gaussianEliminatorParams.isEliminateTooSimilarLsf = true;
pa.gaussianEliminatorParams.isCheckF0Outliers = true;
pa.gaussianEliminatorParams.isCheckDurationOutliers = true;
pa.gaussianEliminatorParams.isCheckEnergyOutliers = true;
pa.gaussianEliminatorParams.totalStandardDeviations = new TotalStandardDeviations(tsd);
//
// KMeans one-to-many and many-to-one mapping eliminator
pa.kmeansEliminatorParams.isActive = true; // Set to false if you do not want to use this eliminator at all
// pa.kmeansEliminatorParams.eliminationAlgorithm = KMeansMappingEliminatorParams.ELIMINATE_LEAST_LIKELY_MAPPINGS;
// pa.kmeansEliminatorParams.eliminationLikelihood = 0.20;
pa.kmeansEliminatorParams.eliminationAlgorithm = KMeansMappingEliminatorParams.ELIMINATE_MEAN_DISTANCE_MISMATCHES;
pa.kmeansEliminatorParams.distanceType = DistanceComputer.NORMALIZED_EUCLIDEAN_DISTANCE;
// pa.kmeansEliminatorParams.distanceType = DistanceComputer.EUCLIDEAN_DISTANCE;
pa.kmeansEliminatorParams.isGlobalVariance = true;
// pa.kmeansEliminatorParams.eliminationAlgorithm =
// KMeansMappingEliminatorParams.ELIMINATE_USING_SUBCLUSTER_MEAN_DISTANCES;
pa.kmeansEliminatorParams.isSeparateClustering = false; // Cluster features separately(true) or together(false)?
// Effective only when isSeparateClustering clustering is false
tsd.general = 0.1;
pa.kmeansEliminatorParams.numClusters = 30;
// Effective only when isSeparateClustering clustering is true
tsd.lsf = 1.0;
tsd.f0 = 1.0;
tsd.duration = 1.0;
tsd.energy = 1.0;
pa.kmeansEliminatorParams.numClustersLsf = 30;
pa.kmeansEliminatorParams.numClustersF0 = 50;
pa.kmeansEliminatorParams.numClustersDuration = 5;
pa.kmeansEliminatorParams.numClustersEnergy = 5;
pa.kmeansEliminatorParams.isCheckLsfOutliers = true;
pa.kmeansEliminatorParams.isCheckF0Outliers = false;
pa.kmeansEliminatorParams.isCheckDurationOutliers = false;
pa.kmeansEliminatorParams.isCheckEnergyOutliers = false;
//
pa.kmeansEliminatorParams.totalStandardDeviations = new TotalStandardDeviations(tsd);
//
WeightedCodebookParallelTrainer t = new WeightedCodebookParallelTrainer(pp, fe, pa);
t.run();
System.out.println("Training completed...");
}
public static void mainHappyF() throws UnsupportedAudioFileException, IOException {
BaselinePreprocessor pp = new BaselinePreprocessor();
BaselineFeatureExtractor fe = new BaselineFeatureExtractor();
WeightedCodebookTrainerParams pa = new WeightedCodebookTrainerParams();
pa.codebookHeader.codebookType = WeightedCodebookFileHeader.FRAMES; // Frame-by-frame mapping of features
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.FRAME_GROUPS; pa.codebookHeader.numNeighboursInFrameGroups
// = 3; //Mapping of frame average features (no label information but fixed amount of neighbouring frames is used)
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.LABELS; //Mapping of label average features
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.LABEL_GROUPS; pa.codebookHeader.numNeighboursInLabelGroups
// = 1; //Mapping of average features collected across label groups (i.e. vowels, consonants, etc)
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.SPEECH; //Mapping of average features collected across all
// speech parts (i.e. like spectral equalization)
pa.codebookHeader.sourceTag = "neutralF"; // Source name tag (i.e. style or speaker identity)
pa.codebookHeader.targetTag = "happyF"; // Target name tag (i.e. style or speaker identity)
pa.trainingBaseFolder = "D:/Oytun/DFKI/voices/Interspeech08_out/neutral2happy"; // Training base directory
pa.sourceTrainingFolder = "D:/Oytun/DFKI/voices/Interspeech08/neutral/train_200"; // Source training folder
pa.targetTrainingFolder = "D:/Oytun/DFKI/voices/Interspeech08/happy/train_200"; // Target training folder
pa.indexMapFileExtension = ".imf"; // Index map file extensions
pa.codebookHeader.lsfParams.dimension = 20; // 0:Auto set LP order
pa.codebookHeader.lsfParams.preCoef = 0.97f;
pa.codebookHeader.lsfParams.skipsize = 0.010f;
pa.codebookHeader.lsfParams.winsize = 0.020f;
pa.codebookHeader.lsfParams.windowType = Window.HAMMING;
String baseFile = StringUtils.checkLastSlash(pa.trainingBaseFolder) + pa.codebookHeader.sourceTag + "_X_"
+ pa.codebookHeader.targetTag;
pa.codebookFile = baseFile + "_200" + WeightedCodebookFile.DEFAULT_EXTENSION;
pa.pitchMappingFile = baseFile + "_200" + PitchMappingFile.DEFAULT_EXTENSION;
pa.isForcedAnalysis = false;
pa.codebookHeader.ptcParams.windowSizeInSeconds = 0.040;
pa.codebookHeader.ptcParams.skipSizeInSeconds = 0.005;
pa.codebookHeader.ptcParams.voicingThreshold = 0.30;
pa.codebookHeader.ptcParams.isDoublingCheck = false;
pa.codebookHeader.ptcParams.isHalvingCheck = false;
pa.codebookHeader.ptcParams.minimumF0 = 40.0f;
pa.codebookHeader.ptcParams.maximumF0 = 400.0f;
pa.codebookHeader.ptcParams.centerClippingRatio = 0.3;
pa.codebookHeader.ptcParams.cutOff1 = pa.codebookHeader.ptcParams.minimumF0 - 20.0;
pa.codebookHeader.ptcParams.cutOff2 = pa.codebookHeader.ptcParams.maximumF0 + 200.0;
pa.codebookHeader.energyParams.windowSizeInSeconds = 0.020;
pa.codebookHeader.energyParams.skipSizeInSeconds = 0.010;
TotalStandardDeviations tsd = new TotalStandardDeviations();
tsd.lsf = 1.5;
tsd.f0 = 1.0;
tsd.duration = 1.0;
tsd.energy = 2.0;
// Gaussian outlier eliminator
// Decreasing totalStandardDeviations will lead to more outlier eliminations, i.e. smaller codebooks
pa.gaussianEliminatorParams.isActive = true; // Set to false if you do not want to use this eliminator at all
pa.gaussianEliminatorParams.isCheckLsfOutliers = true;
pa.gaussianEliminatorParams.isEliminateTooSimilarLsf = true;
pa.gaussianEliminatorParams.isCheckF0Outliers = true;
pa.gaussianEliminatorParams.isCheckDurationOutliers = true;
pa.gaussianEliminatorParams.isCheckEnergyOutliers = true;
pa.gaussianEliminatorParams.totalStandardDeviations = new TotalStandardDeviations(tsd);
//
// KMeans one-to-many and many-to-one mapping eliminator
pa.kmeansEliminatorParams.isActive = true; // Set to false if you do not want to use this eliminator at all
// pa.kmeansEliminatorParams.eliminationAlgorithm = KMeansMappingEliminatorParams.ELIMINATE_LEAST_LIKELY_MAPPINGS;
// pa.kmeansEliminatorParams.eliminationLikelihood = 0.20;
pa.kmeansEliminatorParams.eliminationAlgorithm = KMeansMappingEliminatorParams.ELIMINATE_MEAN_DISTANCE_MISMATCHES;
pa.kmeansEliminatorParams.distanceType = DistanceComputer.NORMALIZED_EUCLIDEAN_DISTANCE;
// pa.kmeansEliminatorParams.distanceType = DistanceComputer.EUCLIDEAN_DISTANCE;
pa.kmeansEliminatorParams.isGlobalVariance = true;
// pa.kmeansEliminatorParams.eliminationAlgorithm =
// KMeansMappingEliminatorParams.ELIMINATE_USING_SUBCLUSTER_MEAN_DISTANCES;
pa.kmeansEliminatorParams.isSeparateClustering = false; // Cluster features separately(true) or together(false)?
// Effective only when isSeparateClustering clustering is false
tsd.general = 0.1;
pa.kmeansEliminatorParams.numClusters = 30;
// Effective only when isSeparateClustering clustering is true
tsd.lsf = 1.0;
tsd.f0 = 1.0;
tsd.duration = 1.0;
tsd.energy = 1.0;
pa.kmeansEliminatorParams.numClustersLsf = 30;
pa.kmeansEliminatorParams.numClustersF0 = 50;
pa.kmeansEliminatorParams.numClustersDuration = 5;
pa.kmeansEliminatorParams.numClustersEnergy = 5;
pa.kmeansEliminatorParams.isCheckLsfOutliers = true;
pa.kmeansEliminatorParams.isCheckF0Outliers = false;
pa.kmeansEliminatorParams.isCheckDurationOutliers = false;
pa.kmeansEliminatorParams.isCheckEnergyOutliers = false;
//
pa.kmeansEliminatorParams.totalStandardDeviations = new TotalStandardDeviations(tsd);
//
WeightedCodebookParallelTrainer t = new WeightedCodebookParallelTrainer(pp, fe, pa);
t.run();
System.out.println("Training completed...");
}
public static void mainSadF() throws UnsupportedAudioFileException, IOException {
BaselinePreprocessor pp = new BaselinePreprocessor();
BaselineFeatureExtractor fe = new BaselineFeatureExtractor();
WeightedCodebookTrainerParams pa = new WeightedCodebookTrainerParams();
pa.codebookHeader.codebookType = WeightedCodebookFileHeader.FRAMES; // Frame-by-frame mapping of features
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.FRAME_GROUPS; pa.codebookHeader.numNeighboursInFrameGroups
// = 3; //Mapping of frame average features (no label information but fixed amount of neighbouring frames is used)
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.LABELS; //Mapping of label average features
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.LABEL_GROUPS; pa.codebookHeader.numNeighboursInLabelGroups
// = 1; //Mapping of average features collected across label groups (i.e. vowels, consonants, etc)
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.SPEECH; //Mapping of average features collected across all
// speech parts (i.e. like spectral equalization)
pa.codebookHeader.sourceTag = "neutralF"; // Source name tag (i.e. style or speaker identity)
pa.codebookHeader.targetTag = "sadF"; // Target name tag (i.e. style or speaker identity)
pa.trainingBaseFolder = "D:/Oytun/DFKI/voices/Interspeech08_out/neutral2sad"; // Training base directory
pa.sourceTrainingFolder = "D:/Oytun/DFKI/voices/Interspeech08/neutral/train_200"; // Source training folder
pa.targetTrainingFolder = "D:/Oytun/DFKI/voices/Interspeech08/sad/train_200"; // Target training folder
pa.indexMapFileExtension = ".imf"; // Index map file extensions
pa.codebookHeader.lsfParams.dimension = 20; // 0:Auto set LP order
pa.codebookHeader.lsfParams.preCoef = 0.97f;
pa.codebookHeader.lsfParams.skipsize = 0.010f;
pa.codebookHeader.lsfParams.winsize = 0.020f;
pa.codebookHeader.lsfParams.windowType = Window.HAMMING;
String baseFile = StringUtils.checkLastSlash(pa.trainingBaseFolder) + pa.codebookHeader.sourceTag + "_X_"
+ pa.codebookHeader.targetTag;
pa.codebookFile = baseFile + "_200" + WeightedCodebookFile.DEFAULT_EXTENSION;
pa.pitchMappingFile = baseFile + "_200" + PitchMappingFile.DEFAULT_EXTENSION;
pa.isForcedAnalysis = false;
pa.codebookHeader.ptcParams.windowSizeInSeconds = 0.040;
pa.codebookHeader.ptcParams.skipSizeInSeconds = 0.005;
pa.codebookHeader.ptcParams.voicingThreshold = 0.30;
pa.codebookHeader.ptcParams.isDoublingCheck = false;
pa.codebookHeader.ptcParams.isHalvingCheck = false;
pa.codebookHeader.ptcParams.minimumF0 = 40.0f;
pa.codebookHeader.ptcParams.maximumF0 = 400.0f;
pa.codebookHeader.ptcParams.centerClippingRatio = 0.3;
pa.codebookHeader.ptcParams.cutOff1 = pa.codebookHeader.ptcParams.minimumF0 - 20.0;
pa.codebookHeader.ptcParams.cutOff2 = pa.codebookHeader.ptcParams.maximumF0 + 200.0;
pa.codebookHeader.energyParams.windowSizeInSeconds = 0.020;
pa.codebookHeader.energyParams.skipSizeInSeconds = 0.010;
TotalStandardDeviations tsd = new TotalStandardDeviations();
tsd.lsf = 1.5;
tsd.f0 = 1.0;
tsd.duration = 1.0;
tsd.energy = 2.0;
// Gaussian outlier eliminator
// Decreasing totalStandardDeviations will lead to more outlier eliminations, i.e. smaller codebooks
pa.gaussianEliminatorParams.isActive = true; // Set to false if you do not want to use this eliminator at all
pa.gaussianEliminatorParams.isCheckLsfOutliers = true;
pa.gaussianEliminatorParams.isEliminateTooSimilarLsf = true;
pa.gaussianEliminatorParams.isCheckF0Outliers = true;
pa.gaussianEliminatorParams.isCheckDurationOutliers = true;
pa.gaussianEliminatorParams.isCheckEnergyOutliers = true;
pa.gaussianEliminatorParams.totalStandardDeviations = new TotalStandardDeviations(tsd);
//
// KMeans one-to-many and many-to-one mapping eliminator
pa.kmeansEliminatorParams.isActive = true; // Set to false if you do not want to use this eliminator at all
// pa.kmeansEliminatorParams.eliminationAlgorithm = KMeansMappingEliminatorParams.ELIMINATE_LEAST_LIKELY_MAPPINGS;
// pa.kmeansEliminatorParams.eliminationLikelihood = 0.20;
pa.kmeansEliminatorParams.eliminationAlgorithm = KMeansMappingEliminatorParams.ELIMINATE_MEAN_DISTANCE_MISMATCHES;
pa.kmeansEliminatorParams.distanceType = DistanceComputer.NORMALIZED_EUCLIDEAN_DISTANCE;
// pa.kmeansEliminatorParams.distanceType = DistanceComputer.EUCLIDEAN_DISTANCE;
pa.kmeansEliminatorParams.isGlobalVariance = true;
// pa.kmeansEliminatorParams.eliminationAlgorithm =
// KMeansMappingEliminatorParams.ELIMINATE_USING_SUBCLUSTER_MEAN_DISTANCES;
pa.kmeansEliminatorParams.isSeparateClustering = false; // Cluster features separately(true) or together(false)?
// Effective only when isSeparateClustering clustering is false
tsd.general = 0.1;
pa.kmeansEliminatorParams.numClusters = 30;
// Effective only when isSeparateClustering clustering is true
tsd.lsf = 1.0;
tsd.f0 = 1.0;
tsd.duration = 1.0;
tsd.energy = 1.0;
pa.kmeansEliminatorParams.numClustersLsf = 30;
pa.kmeansEliminatorParams.numClustersF0 = 50;
pa.kmeansEliminatorParams.numClustersDuration = 5;
pa.kmeansEliminatorParams.numClustersEnergy = 5;
pa.kmeansEliminatorParams.isCheckLsfOutliers = true;
pa.kmeansEliminatorParams.isCheckF0Outliers = false;
pa.kmeansEliminatorParams.isCheckDurationOutliers = false;
pa.kmeansEliminatorParams.isCheckEnergyOutliers = false;
//
pa.kmeansEliminatorParams.totalStandardDeviations = new TotalStandardDeviations(tsd);
//
WeightedCodebookParallelTrainer t = new WeightedCodebookParallelTrainer(pp, fe, pa);
t.run();
System.out.println("Training completed...");
}
/**
* Depending on the parameters it will train a Codebook. Input: /Neutral-Spike-Conversion/codebook/neutral/train_99/*.wav and
* *.lab /Neutral-Spike-Conversion/codebook/angry/train_99/*.wav and *.lab In these directories it will calculate *.lsf,
* *.ptc, *.ene Ouput: /Neutral-Spike-Conversion/codebook/neutral2angry/neutralF_X_angryF_99.pmf
*
* @throws UnsupportedAudioFileException
* unsupported audio file exception
* @throws IOException
* IO Exception
*/
public static void mainQuickTest2() throws UnsupportedAudioFileException, IOException {
BaselinePreprocessor pp = new BaselinePreprocessor();
BaselineFeatureExtractor fe = new BaselineFeatureExtractor();
WeightedCodebookTrainerParams pa = new WeightedCodebookTrainerParams();
pa.codebookHeader.codebookType = WeightedCodebookFileHeader.FRAMES; // Frame-by-frame mapping of features
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.FRAME_GROUPS; pa.codebookHeader.numNeighboursInFrameGroups
// = 3; //Mapping of frame average features (no label information but fixed amount of neighbouring frames is used)
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.LABELS; //Mapping of label average features
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.LABEL_GROUPS; pa.codebookHeader.numNeighboursInLabelGroups
// = 1; //Mapping of average features collected across label groups (i.e. vowels, consonants, etc)
// pa.codebookHeader.codebookType = WeightedCodebookFileHeader.SPEECH; //Mapping of average features collected across all
// speech parts (i.e. like spectral equalization)
pa.codebookHeader.sourceTag = "neutralF"; // Source name tag (i.e. style or speaker identity)
pa.codebookHeader.targetTag = "angryF"; // Target name tag (i.e. style or speaker identity)
pa.trainingBaseFolder = "/project/mary/marcela/VoiceConversion/Neutral-Spike-Conversion/codebook/neutral2angry"; // Training
// base
// directory
pa.sourceTrainingFolder = "/project/mary/marcela/VoiceConversion/Neutral-Spike-Conversion/codebook/neutral/train_99"; // Source
// training
// folder
pa.targetTrainingFolder = "/project/mary/marcela/VoiceConversion/Neutral-Spike-Conversion/codebook/angry/train_99"; // Target
// training
// folder
pa.indexMapFileExtension = ".imf"; // Index map file extensions
pa.codebookHeader.lsfParams.dimension = 20; // 0:Auto set LP order
pa.codebookHeader.lsfParams.preCoef = 0.97f;
pa.codebookHeader.lsfParams.skipsize = 0.010f;
pa.codebookHeader.lsfParams.winsize = 0.020f;
pa.codebookHeader.lsfParams.windowType = Window.HAMMING;
String baseFile = StringUtils.checkLastSlash(pa.trainingBaseFolder) + pa.codebookHeader.sourceTag + "_X_"
+ pa.codebookHeader.targetTag;
pa.codebookFile = baseFile + "_99" + WeightedCodebookFile.DEFAULT_EXTENSION;
pa.pitchMappingFile = baseFile + "_99" + PitchMappingFile.DEFAULT_EXTENSION;
pa.isForcedAnalysis = false;
pa.codebookHeader.ptcParams.windowSizeInSeconds = 0.040;
pa.codebookHeader.ptcParams.skipSizeInSeconds = 0.005;
pa.codebookHeader.ptcParams.voicingThreshold = 0.30;
pa.codebookHeader.ptcParams.isDoublingCheck = false;
pa.codebookHeader.ptcParams.isHalvingCheck = false;
pa.codebookHeader.ptcParams.minimumF0 = 40.0f;
pa.codebookHeader.ptcParams.maximumF0 = 400.0f;
pa.codebookHeader.ptcParams.centerClippingRatio = 0.3;
pa.codebookHeader.ptcParams.cutOff1 = pa.codebookHeader.ptcParams.minimumF0 - 20.0;
pa.codebookHeader.ptcParams.cutOff2 = pa.codebookHeader.ptcParams.maximumF0 + 200.0;
pa.codebookHeader.energyParams.windowSizeInSeconds = 0.020;
pa.codebookHeader.energyParams.skipSizeInSeconds = 0.010;
TotalStandardDeviations tsd = new TotalStandardDeviations();
tsd.lsf = 1.5;
tsd.f0 = 1.0;
tsd.duration = 1.0;
tsd.energy = 2.0;
// Gaussian outlier eliminator
// Decreasing totalStandardDeviations will lead to more outlier eliminations, i.e. smaller codebooks
pa.gaussianEliminatorParams.isActive = true; // Set to false if you do not want to use this eliminator at all
pa.gaussianEliminatorParams.isCheckLsfOutliers = true;
pa.gaussianEliminatorParams.isEliminateTooSimilarLsf = true;
pa.gaussianEliminatorParams.isCheckF0Outliers = true;
pa.gaussianEliminatorParams.isCheckDurationOutliers = true;
pa.gaussianEliminatorParams.isCheckEnergyOutliers = true;
pa.gaussianEliminatorParams.totalStandardDeviations = new TotalStandardDeviations(tsd);
//
// KMeans one-to-many and many-to-one mapping eliminator
pa.kmeansEliminatorParams.isActive = true; // Set to false if you do not want to use this eliminator at all
// pa.kmeansEliminatorParams.eliminationAlgorithm = KMeansMappingEliminatorParams.ELIMINATE_LEAST_LIKELY_MAPPINGS;
// pa.kmeansEliminatorParams.eliminationLikelihood = 0.20;
pa.kmeansEliminatorParams.eliminationAlgorithm = KMeansMappingEliminatorParams.ELIMINATE_MEAN_DISTANCE_MISMATCHES;
pa.kmeansEliminatorParams.distanceType = DistanceComputer.NORMALIZED_EUCLIDEAN_DISTANCE;
// pa.kmeansEliminatorParams.distanceType = DistanceComputer.EUCLIDEAN_DISTANCE;
pa.kmeansEliminatorParams.isGlobalVariance = true;
// pa.kmeansEliminatorParams.eliminationAlgorithm =
// KMeansMappingEliminatorParams.ELIMINATE_USING_SUBCLUSTER_MEAN_DISTANCES;
pa.kmeansEliminatorParams.isSeparateClustering = false; // Cluster features separately(true) or together(false)?
// Effective only when isSeparateClustering clustering is false
tsd.general = 0.1;
pa.kmeansEliminatorParams.numClusters = 30;
// Effective only when isSeparateClustering clustering is true
tsd.lsf = 1.0;
tsd.f0 = 1.0;
tsd.duration = 1.0;
tsd.energy = 1.0;
pa.kmeansEliminatorParams.numClustersLsf = 30;
pa.kmeansEliminatorParams.numClustersF0 = 50;
pa.kmeansEliminatorParams.numClustersDuration = 5;
pa.kmeansEliminatorParams.numClustersEnergy = 5;
pa.kmeansEliminatorParams.isCheckLsfOutliers = true;
pa.kmeansEliminatorParams.isCheckF0Outliers = false;
pa.kmeansEliminatorParams.isCheckDurationOutliers = false;
pa.kmeansEliminatorParams.isCheckEnergyOutliers = false;
//
pa.kmeansEliminatorParams.totalStandardDeviations = new TotalStandardDeviations(tsd);
//
WeightedCodebookParallelTrainer t = new WeightedCodebookParallelTrainer(pp, fe, pa);
t.run();
System.out.println("Training completed...");
}
}