/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.signalproc.adaptation; import marytts.util.string.StringUtils; /** * This class keeps information on each specific training item For example, a training item for a sentence based voice conversion * training database could be a wav file, the corresponding text transcription, label file, pitch contour file, etc. The training * set is a collection of BaseTrainingItem objects * * @author Oytun Türk */ public class BaselineAdaptationItem { // A decomposition of the file into its sinus+noise+transients+residual components // audioFile = sinesFile+noiseFile+transientsFile+residualFile public String sinesFile; // Sinusoids public String noiseFile; // Noise public String transientsFile; // Transients public String residualFile; // Residual (what remains after all model based decomposition) // public String labelFile; // Labels public String pitchFile; // f0 contour in binary format (.ptc) public String f0File; // f0 contour in ESPS format (.f0) public String pitchMarkFile; // Pitch marks public String energyFile; // Energy contour public String textFile; // Text public String rawMfccFile; // Raw mel frequency cepstral coefficients public String mfccFile; // Mel frequency cepstral coefficients public String lsfFile; // Line spectral frequencies public String lpcFile; // Linear prediction coefficients public String lpResidualFile; // Time-domain residual waveform after LP inverse filtering public String cepsFile; // Cepstrum coefficients file public String eggFile; // Electro-glottograph file // Mary TTS outputs to specify target features for tests, transplantation, etc public String targetFestivalUttFile; // FESTIVAL_UTT output which contains target timing and f0s (also the labels) // This needs to be mapped with actual labels (i.e. labelFile) and f0s (pitchFile) to // obtain required prosody modification factors public String targetLabelFile; // Target labels for mapping public String targetPitchFile; // Target pitch file, to be used in transplantations public String targetF0File; // Target pitch file, to be used in transplantations public String targetEnergyFile; // Target energy file, to be used in transplantations public String targetWavFile; // Target waveform file // public String audioFile; // Original waveform file public BaselineAdaptationItem() { } public BaselineAdaptationItem(BaselineAdaptationItem existing) { sinesFile = existing.sinesFile; noiseFile = existing.noiseFile; transientsFile = existing.transientsFile; residualFile = existing.residualFile; labelFile = existing.labelFile; pitchFile = existing.pitchFile; f0File = existing.f0File; pitchMarkFile = existing.pitchMarkFile; energyFile = existing.energyFile; textFile = existing.textFile; rawMfccFile = existing.rawMfccFile; mfccFile = existing.mfccFile; lsfFile = existing.lsfFile; lpcFile = existing.lpcFile; lpResidualFile = existing.lpResidualFile; cepsFile = existing.cepsFile; eggFile = existing.eggFile; targetFestivalUttFile = existing.targetFestivalUttFile; targetLabelFile = existing.targetLabelFile; targetPitchFile = existing.targetPitchFile; targetF0File = existing.targetF0File; targetEnergyFile = existing.targetEnergyFile; targetWavFile = existing.targetWavFile; audioFile = existing.audioFile; } public void setFromWavFilename(String referenceFilename) { audioFile = referenceFilename; sinesFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.SINUSOID_EXTENSION_DEFAULT); // Sinusoids noiseFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.NOISE_EXTENSION_DEFAULT); // Noise transientsFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.TRANSIENT_EXTENSION_DEFAULT); // Transients residualFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.RESIDUAL_EXTENSION_DEFAULT); // Residual (what // remains // after all // model based // decomposition) labelFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.LABEL_EXTENSION_DEFAULT); // Labels pitchFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.PITCH_EXTENSION_DEFAULT); // f0 contour f0File = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.F0_EXTENSION_DEFAULT); // f0 contour pitchMarkFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.PITCHMARK_EXTENSION_DEFAULT); // Pitch marks energyFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.ENERGY_EXTENSION_DEFAULT); // Energy contour textFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.TEXT_EXTENSION_DEFAULT); // Text mfccFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.MFCC_EXTENSION_DEFAULT); // Mel frequency cepstral // coefficients rawMfccFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.RAWMFCC_EXTENSION_DEFAULT); lsfFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.LSF_EXTENSION_DEFAULT); // Line spectral // frequencies lpcFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.LPC_EXTENSION_DEFAULT); // Linear prediction // coefficients lpResidualFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.LPRESIDUAL_EXTENSION_DEFAULT); // Time-domain // residual // waveform // after // LP // inverse // filtering cepsFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.CEPSTRUM_EXTENSION_DEFAULT); // Cepstrum // coefficients // file eggFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.EGG_EXTENSION_DEFAULT); // Electro-glottograph file targetFestivalUttFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.TARGETFESTIVALUTT_EXTENSION_DEFAULT); // FESTIVAL_UTT // file targetLabelFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.TARGETLABEL_EXTENSION_DEFAULT); // Target // labels // for // mapping targetPitchFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.TARGETPITCH_EXTENSION_DEFAULT); // Target // pitch // for // copy // synthesis targetF0File = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.TARGETF0_EXTENSION_DEFAULT); // Target pitch // for copy // synthesis targetEnergyFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.TARGETENERGY_EXTENSION_DEFAULT); // Target // energy // file, // to // be // used // in // transplantations targetWavFile = StringUtils.modifyExtension(audioFile, BaselineAdaptationSet.TARGETWAV_EXTENSION_DEFAULT); // Target // waveform // file } }