/**
* Copyright 2007 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.signalproc.analysis.distance;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import javax.sound.sampled.AudioInputStream;
import javax.sound.sampled.AudioSystem;
import javax.sound.sampled.UnsupportedAudioFileException;
import marytts.signalproc.adaptation.BaselineAdaptationItem;
import marytts.signalproc.adaptation.BaselineAdaptationSet;
import marytts.signalproc.adaptation.IndexMap;
import marytts.signalproc.analysis.Labels;
import marytts.util.data.AlignLabelsUtils;
import marytts.util.data.audio.AudioDoubleDataSource;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;
import marytts.util.string.StringUtils;
/**
* Implements an LP spectral envelope based distortion measure
*
* @author Oytun Türk
*/
public class BaselineLPSpectralEnvelopeDistortionComputer extends BaselineDistortionComputer {
public static final double DEFAULT_WINDOWSIZE = 0.020;
public static final double DEFAULT_SKIPSIZE = 0.010;
public static final int DEFAULT_FFTSIZE = -1;
public static final int DEFAULT_LPORDER = -1;
public BaselineLPSpectralEnvelopeDistortionComputer() {
super();
}
public double[] getDistances(String folder1, String folder2) throws IOException {
return getDistances(folder1, folder2, DEFAULT_WINDOWSIZE);
}
public double[] getDistances(String folder1, String folder2, double winSizeInSeconds) throws IOException {
return getDistances(folder1, folder2, winSizeInSeconds, DEFAULT_SKIPSIZE);
}
public double[] getDistances(String folder1, String folder2, double winSizeInSeconds, double skipSizeInSeconds)
throws IOException {
return getDistances(folder1, folder2, winSizeInSeconds, skipSizeInSeconds, DEFAULT_FFTSIZE);
}
public double[] getDistances(String folder1, String folder2, double winSizeInSeconds, double skipSizeInSeconds, int fftSize)
throws IOException {
return getDistances(folder1, folder2, winSizeInSeconds, skipSizeInSeconds, fftSize, DEFAULT_LPORDER);
}
public double[] getDistances(String folder1, String folder2, double winSizeInSeconds, double skipSizeInSeconds, int fftSize,
int lpOrder) throws IOException {
folder1 = StringUtils.checkLastSlash(folder1);
folder2 = StringUtils.checkLastSlash(folder2);
BaselineAdaptationSet set1 = new BaselineAdaptationSet(folder1, BaselineAdaptationSet.WAV_EXTENSION_DEFAULT);
BaselineAdaptationSet set2 = new BaselineAdaptationSet(folder2, BaselineAdaptationSet.WAV_EXTENSION_DEFAULT);
return getDistances(set1, set2, winSizeInSeconds, skipSizeInSeconds, fftSize, lpOrder);
}
public double[] getDistances(BaselineAdaptationSet set1, BaselineAdaptationSet set2) throws IOException {
return getDistances(set1, set2, DEFAULT_WINDOWSIZE);
}
public double[] getDistances(BaselineAdaptationSet set1, BaselineAdaptationSet set2, double winSizeInSeconds)
throws IOException {
return getDistances(set1, set2, winSizeInSeconds, DEFAULT_SKIPSIZE);
}
public double[] getDistances(BaselineAdaptationSet set1, BaselineAdaptationSet set2, double winSizeInSeconds,
double skipSizeInSeconds) throws IOException {
return getDistances(set1, set2, winSizeInSeconds, skipSizeInSeconds, DEFAULT_FFTSIZE);
}
public double[] getDistances(BaselineAdaptationSet set1, BaselineAdaptationSet set2, double winSizeInSeconds,
double skipSizeInSeconds, int fftSize) throws IOException {
return getDistances(set1, set2, winSizeInSeconds, skipSizeInSeconds, fftSize, DEFAULT_LPORDER);
}
public double[] getDistances(BaselineAdaptationSet set1, BaselineAdaptationSet set2, double winSizeInSeconds,
double skipSizeInSeconds, int fftSize, int lpOrder) throws IOException {
int[] map = new int[Math.min(set1.items.length, set2.items.length)];
for (int i = 0; i < map.length; i++)
map[i] = i;
return getDistances(set1, set2, winSizeInSeconds, skipSizeInSeconds, fftSize, lpOrder, map);
}
public double[] getDistances(BaselineAdaptationSet set1, BaselineAdaptationSet set2, double winSizeInSeconds,
double skipSizeInSeconds, int fftSize, int lpOrder, int[] map) throws IOException {
double[] distances = null;
double[] tmpDistances = null;
for (int i = 0; i < map.length; i++) {
double[] itemDistances = getItemDistances(set1.items[i], set2.items[map[i]], winSizeInSeconds, skipSizeInSeconds,
fftSize, lpOrder);
if (distances != null && itemDistances != null) {
tmpDistances = new double[distances.length];
System.arraycopy(distances, 0, tmpDistances, 0, distances.length);
distances = new double[tmpDistances.length + itemDistances.length];
System.arraycopy(tmpDistances, 0, distances, 0, tmpDistances.length);
System.arraycopy(itemDistances, 0, distances, tmpDistances.length, itemDistances.length);
} else {
distances = new double[itemDistances.length];
System.arraycopy(itemDistances, 0, distances, 0, itemDistances.length);
}
}
return distances;
}
public double[] getItemDistances(BaselineAdaptationItem item1, BaselineAdaptationItem item2, double winSizeInSeconds,
double skipSizeInSeconds) throws IOException {
return getItemDistances(item1, item2, winSizeInSeconds, skipSizeInSeconds, DEFAULT_FFTSIZE);
}
public double[] getItemDistances(BaselineAdaptationItem item1, BaselineAdaptationItem item2, double winSizeInSeconds,
double skipSizeInSeconds, int fftSize) throws IOException {
return getItemDistances(item1, item2, winSizeInSeconds, skipSizeInSeconds, fftSize, DEFAULT_LPORDER);
}
public double[] getItemDistances(BaselineAdaptationItem item1, BaselineAdaptationItem item2, double winSizeInSeconds,
double skipSizeInSeconds, int fftSize, int lpOrder) throws IOException {
double[] frameDistances = null;
// Read wav files & determine avaliable number of frames
AudioInputStream inputAudio1;
AudioInputStream inputAudio2;
try {
inputAudio1 = AudioSystem.getAudioInputStream(new File(item1.audioFile));
inputAudio2 = AudioSystem.getAudioInputStream(new File(item2.audioFile));
} catch (UnsupportedAudioFileException e) {
throw new IOException("Cannot open audio file", e);
}
int i;
int samplingRate1 = (int) inputAudio1.getFormat().getSampleRate();
int ws1 = (int) Math.floor(winSizeInSeconds * samplingRate1 + 0.5);
int ss1 = (int) Math.floor(skipSizeInSeconds * samplingRate1 + 0.5);
AudioDoubleDataSource signal1 = new AudioDoubleDataSource(inputAudio1);
double[] x1 = signal1.getAllData();
double[] frm1 = new double[ws1];
int numfrm1 = (int) Math.floor((x1.length - ws1) / ((double) ss1) + 0.5);
double max1 = MathUtils.absMax(x1);
for (i = 0; i < x1.length; i++)
x1[i] = x1[i] / max1 * 20000;
int samplingRate2 = (int) inputAudio2.getFormat().getSampleRate();
int ws2 = (int) Math.floor(winSizeInSeconds * samplingRate2 + 0.5);
int ss2 = (int) Math.floor(skipSizeInSeconds * samplingRate2 + 0.5);
AudioDoubleDataSource signal2 = new AudioDoubleDataSource(inputAudio2);
double[] x2 = signal2.getAllData();
double[] frm2 = new double[ws2];
int numfrm2 = (int) Math.floor((x2.length - ws2) / ((double) ss2) + 0.5);
double max2 = MathUtils.absMax(x2);
for (i = 0; i < x2.length; i++)
x2[i] = x2[i] / max2 * 20000;
if (fftSize < 0) {
fftSize = Math.max(SignalProcUtils.getDFTSize(samplingRate1), SignalProcUtils.getDFTSize(samplingRate2));
while (fftSize < ws1)
fftSize *= 2;
while (fftSize < ws2)
fftSize *= 2;
}
if (lpOrder < 0)
lpOrder = Math.max(SignalProcUtils.getLPOrder(samplingRate1), SignalProcUtils.getLPOrder(samplingRate2));
//
Labels labs1 = new Labels(item1.labelFile);
Labels labs2 = new Labels(item2.labelFile);
int count = 0;
if (labs1.items != null && labs2.items != null) {
// Find the optimum alignment between the source and the target labels since the phone sequences may not be identical
// due to silence periods etc.
int[][] labelMap = AlignLabelsUtils.alignLabels(labs1.items, labs2.items);
//
if (labelMap != null) {
int j, labInd1, labInd2, frmInd1, frmInd2;
double time1, time2;
double startTime1, endTime1, startTime2, endTime2;
double[] tmpLsfs1 = null;
double[] tmpLsfs2 = null;
int x1Start, x2Start;
labInd1 = 0;
frameDistances = new double[numfrm1];
// Find the corresponding target frame index for each source frame index
for (j = 0; j < numfrm1; j++) {
time1 = SignalProcUtils.frameIndex2Time(j, winSizeInSeconds, skipSizeInSeconds);
while (time1 > labs1.items[labInd1].time) {
labInd1++;
if (labInd1 > labs1.items.length - 1) {
labInd1 = labs1.items.length - 1;
break;
}
}
if (labInd1 > 0 && labInd1 < labs1.items.length - 1) // Exclude first and last label)
{
labInd2 = StringUtils.findInMap(labelMap, labInd1);
if (labInd2 >= 0 && labs1.items[labInd1].phn.compareTo(labs2.items[labInd2].phn) == 0) {
if (labInd1 > 0)
startTime1 = labs1.items[labInd1 - 1].time;
else
startTime1 = 0.0;
if (labInd2 > 0)
startTime2 = labs2.items[labInd2 - 1].time;
else
startTime2 = 0.0;
endTime1 = labs1.items[labInd1].time;
endTime2 = labs2.items[labInd2].time;
time2 = MathUtils.linearMap(time1, startTime1, endTime1, startTime2, endTime2);
frmInd2 = SignalProcUtils.time2frameIndex(time2, winSizeInSeconds, skipSizeInSeconds);
if (frmInd2 < 0)
frmInd2 = 0;
if (frmInd2 > numfrm2 - 1)
frmInd2 = numfrm2 - 1;
x1Start = (int) Math.floor(j * ss1 + 0.5 * ws1 + 0.5);
x2Start = (int) Math.floor(frmInd2 * ss2 + 0.5 * ws2 + 0.5);
if (x1Start + ws1 < x1.length)
System.arraycopy(x1, x1Start, frm1, 0, ws1);
else {
Arrays.fill(frm1, 0.0);
System.arraycopy(x1, x1Start, frm1, 0, x1.length - x1Start);
}
if (x2Start + ws2 < x2.length)
System.arraycopy(x2, x2Start, frm2, 0, ws2);
else {
Arrays.fill(frm2, 0.0);
System.arraycopy(x2, x2Start, frm2, 0, x2.length - x2Start);
}
SignalProcUtils.addWhiteNoise(frm1, 1e-10);
SignalProcUtils.addWhiteNoise(frm2, 1e-10);
frameDistances[count] = frameDistance(frm1, frm2, fftSize, lpOrder);
count++;
}
}
if (count >= frameDistances.length)
break;
}
}
}
if (count > 0) {
double[] tmpFrameDistances = new double[count];
System.arraycopy(frameDistances, 0, tmpFrameDistances, 0, count);
frameDistances = new double[count];
System.arraycopy(tmpFrameDistances, 0, frameDistances, 0, count);
}
return frameDistances;
}
// Implement functionality in derived classes
public double frameDistance(double[] frm1, double[] frm2, int fftSize, int lpOrder) {
return 1.0;
}
public void mainParametric(String srcFolder, String tgtFolder, String tfmFolder, String outputFile, String infoString)
throws IOException {
double[] distances1 = getDistances(tgtFolder, srcFolder);
double[] distances2 = getDistances(tgtFolder, tfmFolder);
ComparativeStatisticsItem stats = new ComparativeStatisticsItem(distances1, distances2);
stats.writeToTextFile(outputFile);
System.out.println(infoString + " reference-method1: MeanDist=" + String.valueOf(stats.referenceVsMethod1.mean) + " "
+ "StdDist=" + String.valueOf(stats.referenceVsMethod1.std));
System.out.println(infoString + " reference-method2: MeanDist=" + String.valueOf(stats.referenceVsMethod2.mean) + " "
+ "StdDist=" + String.valueOf(stats.referenceVsMethod2.std));
System.out.println(infoString + " distance reduction="
+ String.valueOf(stats.referenceVsMethod1.mean - stats.referenceVsMethod2.mean));
}
}