/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.signalproc.analysis.distance; import java.io.IOException; import marytts.modules.phonemiser.AllophoneSet; import marytts.signalproc.adaptation.BaselineAdaptationItem; import marytts.signalproc.adaptation.BaselineAdaptationSet; import marytts.signalproc.adaptation.BaselineFeatureExtractor; import marytts.signalproc.analysis.AlignedLabels; import marytts.signalproc.analysis.Label; import marytts.signalproc.analysis.Labels; import marytts.signalproc.analysis.LsfFileHeader; import marytts.signalproc.analysis.Lsfs; import marytts.tools.analysis.TranscriptionAligner; import marytts.util.io.FileUtils; import marytts.util.math.MathUtils; import marytts.util.signal.SignalProcUtils; import marytts.util.string.StringUtils; /** * Implements root-mean-square line spectral frequency vector distance given two sets of paired files * * @author Oytun Türk */ public class RmsLsfDistortionComputer extends BaselineDistortionComputer { private AllophoneSet allophoneSet; private String silenceSymbol; private TranscriptionAligner aligner; public RmsLsfDistortionComputer() throws IOException { super(); setupTranscriptionAligner(); } private void setupTranscriptionAligner() throws IOException { String allophoneSetFilename = System.getProperty("allophoneset"); if (allophoneSetFilename == null) { throw new IOException("Allophone set not provided (use -Dallophoneset=/path/to/allophones.xml)"); } allophoneSet = null; try { allophoneSet = AllophoneSet.getAllophoneSet(allophoneSetFilename); } catch (Exception e) { IOException ioe = new IOException("Problem reading Allophones file " + allophoneSetFilename); ioe.initCause(e); throw ioe; } silenceSymbol = allophoneSet.getSilence().name(); aligner = new TranscriptionAligner(allophoneSet); } public double[] getDistances(String folder1, String folder2, double upperFreqInHz) throws IOException { folder1 = StringUtils.checkLastSlash(folder1); folder2 = StringUtils.checkLastSlash(folder2); BaselineAdaptationSet set1 = new BaselineAdaptationSet(folder1, BaselineAdaptationSet.WAV_EXTENSION_DEFAULT); BaselineAdaptationSet set2 = new BaselineAdaptationSet(folder2, BaselineAdaptationSet.WAV_EXTENSION_DEFAULT); return getDistances(set1, set2, false, upperFreqInHz); } public double[] getDistances(String folder1, String folder2, boolean isBark, double upperFreqInHz) throws IOException { folder1 = StringUtils.checkLastSlash(folder1); folder2 = StringUtils.checkLastSlash(folder2); BaselineAdaptationSet set1 = new BaselineAdaptationSet(folder1, BaselineAdaptationSet.WAV_EXTENSION_DEFAULT); BaselineAdaptationSet set2 = new BaselineAdaptationSet(folder2, BaselineAdaptationSet.WAV_EXTENSION_DEFAULT); return getDistances(set1, set2, isBark, upperFreqInHz); } public double[] getDistances(BaselineAdaptationSet set1, BaselineAdaptationSet set2, boolean isBark, double upperFreqInHz) throws IOException { int[] map = new int[Math.min(set1.items.length, set2.items.length)]; for (int i = 0; i < map.length; i++) map[i] = i; return getDistances(set1, set2, isBark, upperFreqInHz, map); } public double[] getDistances(BaselineAdaptationSet set1, BaselineAdaptationSet set2, boolean isBark, double upperFreqInHz, int[] map) throws IOException { double[] distances = null; double[] tmpDistances = null; for (int i = 0; i < map.length; i++) { double[] itemDistances = getItemDistances(set1.items[i], set2.items[map[i]], isBark, upperFreqInHz); if (distances != null && itemDistances != null) { tmpDistances = new double[distances.length]; System.arraycopy(distances, 0, tmpDistances, 0, distances.length); distances = new double[tmpDistances.length + itemDistances.length]; System.arraycopy(tmpDistances, 0, distances, 0, tmpDistances.length); System.arraycopy(itemDistances, 0, distances, tmpDistances.length, itemDistances.length); } else { distances = new double[itemDistances.length]; System.arraycopy(itemDistances, 0, distances, 0, itemDistances.length); } } return distances; } /** * Compute the distances per file * * @param set1 * set1 * @param set2 * set2 * @param isBark * isBark * @param upperFreqInHz * upperFreqInHz * @param map * map * @return an array containing, for each file, the array of frame-wise distances. * @throws IOException * IO Exception */ public double[][] getDistancesPerFile(BaselineAdaptationSet set1, BaselineAdaptationSet set2, boolean isBark, double upperFreqInHz, int[] map) throws IOException { double[][] allDistances = new double[map.length][]; for (int i = 0; i < map.length; i++) { double[] itemDistances = getItemDistances(set1.items[i], set2.items[map[i]], isBark, upperFreqInHz); allDistances[i] = itemDistances; } return allDistances; } /** * Return true if the time given corresponds to an initial or final silence symbol in labels, false otherwise. * * @param time * time * @param labels * labels * @param silenceSymbol * silenceSymbol * @return (i == 0 or i == labels.items.length -1) and l.phn.equals(silenceSymbol) */ private boolean isInitialOrFinalSilence(double time, Labels labels, String silenceSymbol) { int i = labels.getLabelIndexAtTime(time); if (i == -1) { // somehow out of range, fix it if (time < 0) { i = 0; } else { i = labels.items.length - 1; } } assert i >= 0; Label l = labels.items[i]; // Exclude initial and final silences: return (i == 0 || i == labels.items.length - 1) && l.phn.equals(silenceSymbol); } /** * Compute the distance between two LSF frames * * @param l1 * one lsf frame * @param l2 * the other lsf frame * @param isBark * whether to convert to bark scale before computing distance * @param upperFreqInHz * the highest frequency up to which to compute the distance * @return the distance */ private double computeOneFrameDistance(double[] l1, double[] l2, boolean isBark, double upperFreqInHz) { int maxInd1 = MathUtils.getLargestIndexSmallerThan(l1, upperFreqInHz); int maxInd2 = MathUtils.getLargestIndexSmallerThan(l2, upperFreqInHz); int maxInd = Math.min(maxInd1, maxInd2); double[] tmp1; double[] tmp2; if (maxInd + 1 == l1.length) { tmp1 = l1; } else { tmp1 = new double[maxInd + 1]; System.arraycopy(l1, 0, tmp1, 0, maxInd + 1); } if (maxInd + 1 == l2.length) { tmp2 = l2; } else { tmp2 = new double[maxInd + 1]; System.arraycopy(l2, 0, tmp2, 0, maxInd + 1); } double distance; if (!isBark) { distance = SignalProcUtils.getRmsDistance(tmp1, tmp2); } else { distance = SignalProcUtils.getRmsDistance(SignalProcUtils.freq2bark(tmp1), SignalProcUtils.freq2bark(tmp2)); } return distance; } public double[] getItemDistances(BaselineAdaptationItem item1, BaselineAdaptationItem item2, boolean isBark, double upperFreqInHz) throws IOException { if (!FileUtils.exists(item1.lsfFile)) // Extract lsfs if necessary { LsfFileHeader lsfParams = new LsfFileHeader(); BaselineFeatureExtractor.lsfAnalysis(item1, lsfParams, true); } if (!FileUtils.exists(item2.lsfFile)) // Extract lsfs if necessary { LsfFileHeader lsfParams = new LsfFileHeader(); BaselineFeatureExtractor.lsfAnalysis(item2, lsfParams, true); } Lsfs lsfs1 = new Lsfs(item1.lsfFile); Lsfs lsfs2 = new Lsfs(item2.lsfFile); Labels labs1 = new Labels(item1.labelFile); Labels labs2 = new Labels(item2.labelFile); double[] frameDistances = null; int count = 0; if (labs1.items == null || labs2.items == null) { throw new IOException("Do not have labels for pair " + StringUtils.getFileName(item1.audioFile)); } // Find the optimum alignment between the source and the target labels since the phone sequences may not be identical due // to silence periods etc. AlignedLabels aligned = aligner.alignLabels(labs1, labs2); assert aligned != null; // Now compute the frame-wise distances by mapping frames according to this label alignment; // for each aligned stretch, we move through the frames of the shorter side of the alignment // to make sure that dist(a,b) == dist(b,a) frameDistances = new double[Math.max(lsfs1.params.numfrm, lsfs2.params.numfrm)]; // Make sure we don't use any frame twice: int frameSeen1 = -1; int frameSeen2 = -1; for (AlignedLabels.AlignedTimeStretch ats : aligned.getAlignedTimeStretches()) { boolean firstIsShorter = (ats.firstDuration <= ats.secondDuration); if (firstIsShorter) { int fromIndex = SignalProcUtils.time2frameIndex(ats.firstStart, lsfs1.params.winsize, lsfs1.params.skipsize); if (fromIndex < 0) { fromIndex = 0; } if (frameSeen1 >= fromIndex) { fromIndex = frameSeen1 + 1; } int toIndex = SignalProcUtils.time2frameIndex(ats.firstStart + ats.firstDuration, lsfs1.params.winsize, lsfs1.params.skipsize); if (toIndex >= lsfs1.lsfs.length) { break; } for (int f1 = fromIndex; f1 <= toIndex; f1++) { double t1 = SignalProcUtils.frameIndex2Time(f1, lsfs1.params.winsize, lsfs1.params.skipsize); double t2 = aligned.mapTimeFromFirstToSecond(t1); if (isInitialOrFinalSilence(t1, labs1, silenceSymbol) || isInitialOrFinalSilence(t2, labs2, silenceSymbol)) { continue; } int f2 = SignalProcUtils.time2frameIndex(t2, lsfs2.params.winsize, lsfs2.params.skipsize); if (f2 <= frameSeen2) { continue; } if (f2 >= lsfs2.lsfs.length) { break; } frameDistances[count++] = computeOneFrameDistance(lsfs1.lsfs[f1], lsfs2.lsfs[f2], isBark, upperFreqInHz); // System.err.println("Compared frames "+f1+" and "+f2); frameSeen1 = f1; frameSeen2 = f2; } } else { // second is shorter int fromIndex = SignalProcUtils.time2frameIndex(ats.secondStart, lsfs2.params.winsize, lsfs2.params.skipsize); if (fromIndex < 0) { fromIndex = 0; } if (frameSeen2 >= fromIndex) { fromIndex = frameSeen2 + 1; } int toIndex = SignalProcUtils.time2frameIndex(ats.secondStart + ats.secondDuration, lsfs2.params.winsize, lsfs2.params.skipsize); if (toIndex >= lsfs2.lsfs.length) { break; } for (int f2 = fromIndex; f2 <= toIndex; f2++) { double t2 = SignalProcUtils.frameIndex2Time(f2, lsfs2.params.winsize, lsfs2.params.skipsize); double t1 = aligned.mapTimeFromSecondToFirst(t2); if (isInitialOrFinalSilence(t1, labs1, silenceSymbol) || isInitialOrFinalSilence(t2, labs2, silenceSymbol)) { continue; } int f1 = SignalProcUtils.time2frameIndex(t1, lsfs1.params.winsize, lsfs1.params.skipsize); if (f1 <= frameSeen1) { continue; } if (f1 >= lsfs1.lsfs.length) { break; } frameDistances[count++] = computeOneFrameDistance(lsfs1.lsfs[f1], lsfs2.lsfs[f2], isBark, upperFreqInHz); // System.err.println("Compared frames "+f1+" and "+f2); frameSeen1 = f1; frameSeen2 = f2; } } } /* * int j, labInd1, labInd2, frmInd1, frmInd2; double time1, time2; double startTime1, endTime1, startTime2, endTime2; * double[] tmpLsfs1 = null; double[] tmpLsfs2 = null; int maxInd1, maxInd2, maxInd; * * labInd1 = 0; * * * //Find the corresponding target frame index for each source frame index for (j=0; j<lsfs1.params.numfrm; j++) { time1 = * SignalProcUtils.frameIndex2Time(j, lsfs1.params.winsize, lsfs1.params.skipsize); int i1 = * labs1.getLabelIndexAtTime(time1); assert i1 >= 0; Label l1 = labs1.items[i1]; // Exclude initial and final silences: if * ((i1 == 0 || i1 == labs1.items.length - 1) && l1.phn.equals(silenceSymbol)) { continue; } time2 = * aligned.mapTimeFromFirstToSecond(time1); int i2 = labs2.getLabelIndexAtTime(time2); assert i2 >= 0; Label l2 = * labs2.items[i2]; // Exclude initial and final silences: if ((i2 == 0 || i2 == labs2.items.length - 1) && * l2.phn.equals(silenceSymbol)) { continue; } * * frmInd2 = SignalProcUtils.time2frameIndex(time2, lsfs2.params.winsize, lsfs2.params.skipsize); if (frmInd2<0) * frmInd2=0; if (frmInd2>lsfs2.params.numfrm-1) frmInd2=lsfs2.params.numfrm-1; * * maxInd1 = MathUtils.getLargestIndexSmallerThan(lsfs1.lsfs[j], upperFreqInHz); maxInd2 = * MathUtils.getLargestIndexSmallerThan(lsfs2.lsfs[frmInd2], upperFreqInHz); maxInd = Math.min(maxInd1, maxInd2); * * tmpLsfs1 = new double[maxInd+1]; tmpLsfs2 = new double[maxInd+1]; System.arraycopy(lsfs1.lsfs[j], 0, tmpLsfs1, 0, * maxInd+1); System.arraycopy(lsfs2.lsfs[frmInd2], 0, tmpLsfs2, 0, maxInd+1); * * if (!isBark) frameDistances[count++] = SignalProcUtils.getRmsDistance(tmpLsfs1, tmpLsfs2); else frameDistances[count++] * = SignalProcUtils.getRmsDistance(SignalProcUtils.freq2bark(tmpLsfs1), SignalProcUtils.freq2bark(tmpLsfs2)); * System.err.println("Compared frames "+j+" and "+frmInd2); * * if (count>=frameDistances.length) break; } */ if (count > 0) { double[] tmpFrameDistances = frameDistances; frameDistances = new double[count]; System.arraycopy(tmpFrameDistances, 0, frameDistances, 0, count); } return frameDistances; } public static void mainParametricInterspeech2008(String method, String emotion, boolean isBark) throws IOException { String baseDir = "D:/Oytun/DFKI/voices/Interspeech08_out/objective_test/"; String tgtFolder = baseDir + "target/" + emotion; String srcFolder = baseDir + "source/" + emotion; String tfmFolder = baseDir + method + "/" + emotion; String outputFile = baseDir + method + "_" + emotion + "_rmsLsf.txt"; RmsLsfDistortionComputer r = new RmsLsfDistortionComputer(); double[] distances1 = r.getDistances(tgtFolder, srcFolder, isBark, 8000); double[] distances2 = r.getDistances(tgtFolder, tfmFolder, isBark, 8000); double m1 = MathUtils.mean(distances1); double s1 = MathUtils.standardDeviation(distances1, m1); double m2 = MathUtils.mean(distances2); double s2 = MathUtils.standardDeviation(distances2, m2); double conf95_1 = MathUtils.getConfidenceInterval95(s1); double conf99_1 = MathUtils.getConfidenceInterval99(s1); double conf95_2 = MathUtils.getConfidenceInterval95(s2); double conf99_2 = MathUtils.getConfidenceInterval99(s2); double[] tmpOut = new double[distances1.length + distances2.length + 9]; tmpOut[0] = m1; // tgt-src mean tmpOut[1] = s1; // tgt-src std tmpOut[2] = m2; // tgt-tfm mean tmpOut[3] = s2; // tgt-tfm std tmpOut[4] = m1 - m2; // decrease in tgt-src distance by tfm tmpOut[5] = conf95_1; // 95% confidence interval for distance tgt-src distances tmpOut[6] = conf99_1; // 99% confidence interval for distance tgt-src distances tmpOut[7] = conf95_2; // 95% confidence interval for distance tgt-tfm distances tmpOut[8] = conf99_2; // 99% confidence interval for distance tgt-tfm distances System.arraycopy(distances1, 0, tmpOut, 9, distances1.length); System.arraycopy(distances2, 0, tmpOut, distances1.length + 9, distances2.length); FileUtils.writeToTextFile(tmpOut, outputFile); double c1Left95 = m1 - conf95_1; double c1Left99 = m1 - conf99_1; double c1Right95 = m1 + conf95_1; double c1Right99 = m1 + conf99_1; double c2Left95 = m2 - conf95_2; double c2Left99 = m2 - conf99_2; double c2Right95 = m2 + conf95_2; double c2Right99 = m2 + conf99_2; System.out.println(method + " " + emotion + " tgt-src: MeanDist=" + String.valueOf(m1) + " " + "StdDist=" + String.valueOf(s1)); System.out.println(method + " " + emotion + " tgt-tfm: MeanDist=" + String.valueOf(m2) + " " + "StdDist=" + String.valueOf(s2)); System.out.println(method + " " + emotion + " distance reduction=" + String.valueOf(m1 - m2)); System.out.println("Confidence intervals tgt-src %95: " + String.valueOf(conf95_1) + " --> [" + String.valueOf(c1Left95) + "," + String.valueOf(c1Right95) + "]"); System.out.println("Confidence intervals tgt-src %99: " + String.valueOf(conf99_1) + " --> [" + String.valueOf(c1Left99) + "," + String.valueOf(c1Right99) + "]"); System.out.println("Confidence intervals tgt-tfm %95: " + String.valueOf(conf95_2) + " --> [" + String.valueOf(c2Left95) + "," + String.valueOf(c2Right95) + "]"); System.out.println("Confidence intervals tgt-tfm %99: " + String.valueOf(conf99_2) + " --> [" + String.valueOf(c2Left99) + "," + String.valueOf(c2Right99) + "]"); System.out.println("---------------------------------"); } // Put source and target wav and lab files into two folders and call this function public static void mainInterspeech2008() throws IOException { boolean isBark = true; String method; // "1_codebook"; "2_frame"; "3_gmm"; String emotion; // "angry"; "happy"; "sad"; "all"; method = "1_codebook"; emotion = "angry"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "happy"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "sad"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "all"; mainParametricInterspeech2008(method, emotion, isBark); method = "2_frame"; emotion = "angry"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "happy"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "sad"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "all"; mainParametricInterspeech2008(method, emotion, isBark); method = "3_gmm"; emotion = "angry"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "happy"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "sad"; mainParametricInterspeech2008(method, emotion, isBark); emotion = "all"; mainParametricInterspeech2008(method, emotion, isBark); System.out.println("Objective test completed..."); } public static void mainHmmVoiceConversion(String method1, String method2, String folder1, String folder2, String referenceFolder, String outputFile, boolean isBark) throws IOException { RmsLsfDistortionComputer r = new RmsLsfDistortionComputer(); double[] distances1 = r.getDistances(referenceFolder, folder1, isBark, 8000); double[] distances2 = r.getDistances(referenceFolder, folder2, isBark, 8000); double m1 = MathUtils.mean(distances1); double s1 = MathUtils.standardDeviation(distances1, m1); double m2 = MathUtils.mean(distances2); double s2 = MathUtils.standardDeviation(distances2, m2); double conf95_1 = MathUtils.getConfidenceInterval95(s1); double conf99_1 = MathUtils.getConfidenceInterval99(s1); double conf95_2 = MathUtils.getConfidenceInterval95(s2); double conf99_2 = MathUtils.getConfidenceInterval99(s2); double[] tmpOut = new double[distances1.length + distances2.length + 9]; tmpOut[0] = m1; // tgt-src mean tmpOut[1] = s1; // tgt-src std tmpOut[2] = m2; // tgt-tfm mean tmpOut[3] = s2; // tgt-tfm std tmpOut[4] = m1 - m2; // decrease in tgt-src distance by tfm tmpOut[5] = conf95_1; // 95% confidence interval for distance tgt-src distances tmpOut[6] = conf99_1; // 99% confidence interval for distance tgt-src distances tmpOut[7] = conf95_2; // 95% confidence interval for distance tgt-tfm distances tmpOut[8] = conf99_2; // 99% confidence interval for distance tgt-tfm distances System.arraycopy(distances1, 0, tmpOut, 9, distances1.length); System.arraycopy(distances2, 0, tmpOut, distances1.length + 9, distances2.length); FileUtils.writeToTextFile(tmpOut, outputFile); double c1Left95 = m1 - conf95_1; double c1Left99 = m1 - conf99_1; double c1Right95 = m1 + conf95_1; double c1Right99 = m1 + conf99_1; double c2Left95 = m2 - conf95_2; double c2Left99 = m2 - conf99_2; double c2Right95 = m2 + conf95_2; double c2Right99 = m2 + conf99_2; System.out.println(method1 + " tgt-src: MeanDist=" + String.valueOf(m1) + " " + "StdDist=" + String.valueOf(s1)); System.out.println(method2 + " tgt-tfm: MeanDist=" + String.valueOf(m2) + " " + "StdDist=" + String.valueOf(s2)); System.out.println("Distance reduction=" + String.valueOf(m1 - m2)); System.out.println("Confidence intervals reference-method1 %95: " + String.valueOf(conf95_1) + " --> [" + String.valueOf(c1Left95) + "," + String.valueOf(c1Right95) + "]"); System.out.println("Confidence intervals reference-method1 %99: " + String.valueOf(conf99_1) + " --> [" + String.valueOf(c1Left99) + "," + String.valueOf(c1Right99) + "]"); System.out.println("Confidence intervals reference-method2 %95: " + String.valueOf(conf95_2) + " --> [" + String.valueOf(c2Left95) + "," + String.valueOf(c2Right95) + "]"); System.out.println("Confidence intervals reference-method2 %99: " + String.valueOf(conf99_2) + " --> [" + String.valueOf(c2Left99) + "," + String.valueOf(c2Right99) + "]"); System.out.println("---------------------------------"); } public static void mainHmmVoiceConversion() throws IOException { String baseInputFolder = "D:/Oytun/DFKI/voices/hmmVoiceConversionTest2/output/final/"; String baseOutputFolder = "D:/Oytun/DFKI/voices/hmmVoiceConversionTest2/objective_test/"; boolean isBark = true; String method1, method2, folder1, folder2, referenceFolder, outputFile; referenceFolder = "D:/Oytun/DFKI/voices/hmmVoiceConversionTest2/output/final/origTarget"; // No-GV vs GV method1 = "NOGV"; method2 = "GV"; folder1 = baseInputFolder + "hmmSource_nogv"; folder2 = baseInputFolder + "hmmSource_gv"; outputFile = baseOutputFolder + "lsf_" + method1 + "_" + method2 + ".txt"; mainHmmVoiceConversion(method1, method2, folder1, folder2, referenceFolder, outputFile, isBark); // No-GV vs SC method1 = "NOGV"; method2 = "NOGV+SC"; folder1 = baseInputFolder + "hmmSource_nogv"; folder2 = baseInputFolder + "tfm_nogv_1092files_128mixes"; outputFile = baseOutputFolder + "lsf_" + method1 + "_" + method2 + ".txt"; mainHmmVoiceConversion(method1, method2, folder1, folder2, referenceFolder, outputFile, isBark); // GV vs SC method1 = "GV"; method2 = "GV+SC"; folder1 = baseInputFolder + "hmmSource_gv"; folder2 = baseInputFolder + "tfm_gv_1092files_128mixes"; outputFile = baseOutputFolder + "lsf_" + method1 + "_" + method2 + ".txt"; mainHmmVoiceConversion(method1, method2, folder1, folder2, referenceFolder, outputFile, isBark); System.out.println("Objective test completed..."); } /** * Compare distances between two folders; each folder is expected to contain wav files with the same names and accompanying * lab files. * * @param folder1 * first folder * @param folder2 * second folder * @throws IOException * if any file names don't match. */ public void mainDistancesPerFile(String folder1, String folder2) throws IOException { long startTime = System.currentTimeMillis(); RmsLsfDistortionComputer r = new RmsLsfDistortionComputer(); folder1 = StringUtils.checkLastSlash(folder1); folder2 = StringUtils.checkLastSlash(folder2); BaselineAdaptationSet set1 = new BaselineAdaptationSet(folder1); BaselineAdaptationSet set2 = new BaselineAdaptationSet(folder2); boolean isBark = true; double upperFreqInHz = 8000; int[] map = new int[set1.items.length]; for (int i = 0; i < map.length; i++) { if (!StringUtils.getFileName(set1.items[i].audioFile).equals(StringUtils.getFileName(set2.items[i].audioFile))) { // Non-matching audio file names -- I will not have this throw new IOException("Audio files in folders do not match:\n" + set1.items[i].audioFile + " doesn't match " + set2.items[i].audioFile); } map[i] = i; } double[][] allDistances = r.getDistancesPerFile(set1, set2, isBark, upperFreqInHz, map); assert allDistances.length == map.length; System.out.println("RMSE Bark-scaled LSF distances between " + folder1 + " and " + folder2); // For memory efficiency and computational precision, we compute mean and standard deviation incrementally, // using the following formulae: // mean[n] = mean[n-1] + (1/n) * (x[n] - mean[n-1]) // variance[n] = variance[n-1] + (x[n] - mean[n-1]) * (x[n] - mean[n]) // stddev[n] = sqrt(variance[n] / n) // Mean and variance accumulated across all files: double allMean = 0; double allPrevMean = 0; double allVariance = 0; long allN = 0; for (int i = 0; i < map.length; i++) { // Mean and variance for one file: double oneMean = 0; double onePrevMean = 0; double oneVariance = 0; int oneN = 0; for (int j = 0; j < allDistances[i].length; j++) { double x = allDistances[i][j]; allN++; allPrevMean = allMean; allMean += (x - allPrevMean) / allN; allVariance += (x - allPrevMean) * (x - allMean); oneN++; onePrevMean = oneMean; oneMean += (x - onePrevMean) / oneN; oneVariance += (x - onePrevMean) * (x - oneMean); } double oneStddev = Math.sqrt(oneVariance / oneN); System.out.println(StringUtils.getFileName(set1.items[i].audioFile) + " mean " + oneMean + " stddev " + oneStddev); } double allStddev = Math.sqrt(allVariance / allN); System.out.println("Global mean " + allMean + " stddev " + allStddev); long timeNeeded = System.currentTimeMillis() - startTime; System.err.println("Computed distances between " + map.length + " files in " + timeNeeded + " ms"); } public static void main(String[] args) throws Exception { // mainInterspeech2008(); // mainHmmVoiceConversion(); /* * RmsLsfDistortionComputer d = new RmsLsfDistortionComputer(); BaselineAdaptationItem item1 = new * BaselineAdaptationItem(); BaselineAdaptationItem item2 = new BaselineAdaptationItem(); * item1.setFromWavFilename(args[0]); item2.setFromWavFilename(args[1]); double[] frameDistances = * d.getItemDistances(item1, item2, true, 8000); double meanDist = MathUtils.mean(frameDistances); double stdDist = * MathUtils.standardDeviation(frameDistances, meanDist); * System.out.println(item1.audioFile+"-"+item2.audioFile+" distance: "+meanDist+" (std "+stdDist+")"); */ new RmsLsfDistortionComputer().mainDistancesPerFile(args[0], args[1]); } }