/** * Copyright 2000-2009 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.util.signal; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.Vector; import javax.sound.sampled.AudioFormat; import javax.sound.sampled.AudioInputStream; import javax.sound.sampled.AudioSystem; import javax.sound.sampled.UnsupportedAudioFileException; import marytts.signalproc.analysis.Labels; import marytts.signalproc.analysis.LpcAnalyser; import marytts.signalproc.analysis.PitchMarks; import marytts.signalproc.filter.BandPassFilter; import marytts.signalproc.filter.FIRFilter; import marytts.signalproc.filter.HighPassFilter; import marytts.signalproc.filter.LowPassFilter; import marytts.signalproc.filter.RecursiveFilter; import marytts.signalproc.window.BartlettWindow; import marytts.signalproc.window.BlackmanWindow; import marytts.signalproc.window.FlattopWindow; import marytts.signalproc.window.GaussWindow; import marytts.signalproc.window.HammingWindow; import marytts.signalproc.window.HanningWindow; import marytts.signalproc.window.RectWindow; import marytts.signalproc.window.Window; import marytts.util.data.AlignLabelsUtils; import marytts.util.data.audio.AudioDoubleDataSource; import marytts.util.data.audio.MaryAudioUtils; import marytts.util.display.DisplayUtils; import marytts.util.math.ArrayUtils; import marytts.util.math.ComplexArray; import marytts.util.math.FFT; import marytts.util.math.FFTMixedRadix; import marytts.util.math.MathUtils; import marytts.util.string.StringUtils; public class SignalProcUtils { public static int getLPOrder(int fs) { int P = (int) (fs / 1000.0f + 2); if (P % 2 == 1) P += 1; return P; } public static int getDFTSize(int fs) { int dftSize; if (fs <= 8000) dftSize = 128; else if (fs <= 16000) dftSize = 256; else if (fs <= 22050) dftSize = 512; else if (fs <= 32000) dftSize = 1024; else if (fs <= 44100) dftSize = 2048; else dftSize = 4096; return dftSize; } // Returns an odd filter order depending on the sampling rate for FIR filter design public static int getFIRFilterOrder(int fs) { int oddFilterOrder = getDFTSize(fs) - 1; if (oddFilterOrder % 2 == 0) oddFilterOrder++; return oddFilterOrder; } public static int getLifterOrder(int fs) { int lifterOrder = 2 * (int) (fs / 1000.0f + 2); if (lifterOrder % 2 == 1) lifterOrder += 1; return lifterOrder; } public static int halfSpectrumSize(int fftSize) { return (int) (Math.floor(fftSize / 2.0 + 1.5)); } public static int fullSpectrumSize(int maxFreq) { return 2 * (maxFreq - 1); } public static double getEnergydB(double x) { double[] y = new double[1]; y[0] = x; return getEnergydB(y); } public static double getEnergydB(double[] x) { return getEnergydB(x, x.length); } public static double getEnergydB(double[] x, int len) { return getEnergydB(x, len, 0); } public static double getEnergydB(double[] x, int len, int start) { return 10 * Math.log10(getEnergy(x, len, start)); } public static double getEnergy(double[] x, int len, int start) { if (start < 0) start = 0; if (start > x.length - 1) start = x.length - 1; if (start + len > x.length) len = x.length - start - 1; double en = 0.0; for (int i = start; i < start + len; i++) en += x[i] * x[i]; en = Math.sqrt(en); en = Math.max(en, 1e-100); // Put a minimum floor to avoid -Ininity in log based computations return en; } public static double getEnergy(double[] x, int len) { return getEnergy(x, len, 0); } public static double getEnergy(double[] x) { return getEnergy(x, x.length, 0); } public static double getAverageSampleEnergy(double[] x, int len, int start) { if (start < 0) start = 0; if (start > x.length - 1) start = x.length - 1; if (start + len > x.length) len = x.length - start - 1; double avgSampEn = 0.0; for (int i = start; i < start + len; i++) avgSampEn += x[i] * x[i]; avgSampEn = Math.sqrt(avgSampEn); avgSampEn /= len; return avgSampEn; } public static double getAverageSampleEnergy(double[] x, int len) { return getAverageSampleEnergy(x, len, 0); } public static double getAverageSampleEnergy(double[] x) { return getAverageSampleEnergy(x, x.length, 0); } public static double[] normalizeAverageSampleEnergy(double[] x, double newAverageSampleEnergy) { double gain = newAverageSampleEnergy / (1e-20 + getAverageSampleEnergy(x)); return MathUtils.multiply(x, gain); } public static double[] getEnergyContourRms(double[] x, double windowSizeInSeconds, double skipSizeInSeconds, int samplingRate) { int ws = (int) Math.floor(windowSizeInSeconds * samplingRate + 0.5); int ss = (int) Math.floor(skipSizeInSeconds * samplingRate + 0.5); int numfrm = (int) Math.floor((x.length - (double) ws) / ss + 0.5); double[] energies = null; if (numfrm > 0) { energies = new double[numfrm]; double[] frm = new double[ws]; int i, j; for (i = 0; i < numfrm; i++) { Arrays.fill(frm, 0.0); System.arraycopy(x, i * ss, frm, 0, Math.min(ws, x.length - i * ss)); energies[i] = 0.0; for (j = 0; j < ws; j++) energies[i] += frm[j] * frm[j]; energies[i] /= ws; energies[i] = Math.sqrt(energies[i]); energies[i] = MathUtils.amp2db(energies[i] + 1e-20); } } return energies; } public static float[] getAverageSampleEnergyContour(double[] x, double windowSizeInSeconds, double skipSizeInSeconds, int samplingRate) { int ws = (int) Math.floor(windowSizeInSeconds * samplingRate + 0.5); int ss = (int) Math.floor(skipSizeInSeconds * samplingRate + 0.5); int numfrm = (int) Math.floor((x.length - (double) ws) / ss + 0.5); float[] averageSampleEnergies = null; if (numfrm > 0) { averageSampleEnergies = new float[numfrm]; double[] frm = new double[ws]; int i, j; for (i = 0; i < numfrm; i++) { Arrays.fill(frm, 0.0); System.arraycopy(x, i * ss, frm, 0, Math.min(ws, x.length - i * ss)); averageSampleEnergies[i] = (float) SignalProcUtils.getAverageSampleEnergy(frm); } } return averageSampleEnergies; } // Returns the average sample energy contour around times using analysis windows of site windowDurationInSeconds public static float[] getAverageSampleEnergyContour(double[] x, float[] times, int samplingRateInHz, float windowDurationInSeconds) { float[] averageSampleEnergies = null; if (x != null && times != null) { int startInd, endInd; int len; double[] frm; averageSampleEnergies = new float[times.length]; for (int i = 0; i < times.length; i++) { averageSampleEnergies[i] = 0.0f; if (times[i] > -1.0f) { startInd = SignalProcUtils.time2sample(Math.max(0.0f, times[i] - 0.5f * windowDurationInSeconds), samplingRateInHz); endInd = SignalProcUtils.time2sample(times[i] + 0.5 * windowDurationInSeconds, samplingRateInHz); if (endInd > x.length - 1) endInd = x.length - 1; len = endInd - startInd + 1; if (len > 0) { frm = new double[len]; System.arraycopy(x, startInd, frm, 0, len); averageSampleEnergies[i] = (float) SignalProcUtils.getAverageSampleEnergy(frm); } } } } return averageSampleEnergies; } public static double[] normalizeAverageSampleEnergyContour(double[] x, float[] times, float[] currentContour, float[] targetContour, int samplingRateInHz, float windowDurationInSeconds) { float[] averageSampleEnergies = null; double[] y = null; if (x != null && times != null) { y = ArrayUtils.copy(x); int n; float t; int ind; float gain; for (n = 0; n < y.length; n++) { t = SignalProcUtils.sample2time(n, samplingRateInHz); ind = MathUtils.findClosest(times, t); gain = 1.0f; if (currentContour[ind] > 0.0f && times[ind] > -1.0f) { if (t < times[ind] && ind > 0 && currentContour[ind - 1] > 0.0f && times[ind - 1] > -1.0f) gain = MathUtils.linearMap(t, times[ind - 1], times[ind], targetContour[ind - 1] / currentContour[ind - 1], targetContour[ind] / currentContour[ind]); else if (t > times[ind] && ind < times.length - 1 && currentContour[ind + 1] > 0.0f && times[ind + 1] > -1.0f) gain = MathUtils.linearMap(t, times[ind], times[ind + 1], targetContour[ind] / currentContour[ind], targetContour[ind + 1] / currentContour[ind + 1]); else gain = targetContour[ind] / currentContour[ind]; } y[n] *= gain; } } return y; } // Returns the reversed version of the input array public static double[] reverse(double[] x) { double[] y = new double[x.length]; for (int i = 0; i < x.length; i++) y[i] = x[x.length - i - 1]; return y; } // Returns voiced/unvoiced information for each pitch frame public static boolean[] getVuvs(double[] f0s) { if (f0s != null) { boolean[] vuvs = new boolean[f0s.length]; for (int i = 0; i < vuvs.length; i++) { if (f0s[i] < 10.0) vuvs[i] = false; else vuvs[i] = true; } return vuvs; } else return null; } /* * Extracts pitch marks from a given pitch contour. // It is not very optimized, only inserts a pitch mark and waits for * sufficient samples before inserting a new one // in order not to insert more than one pitch mark within one pitch period // * // f0s: pitch contour vector // fs: sampling rate in Hz // len: total samples in original speech signal // ws: window size * used for pitch extraction in seconds // ss: skip size used for pitch extraction in seconds // // offset: applies a fixed * offset to the first pitch mark so that all pitch marks are shifted by same amount // // To do: // Perform marking on the * residual by trying to locate glottal closure instants (might be implemented as a separate function indeed) // This may * improve PSOLA performance also */ public static PitchMarks pitchContour2pitchMarks(double[] f0s, int fs, int len, double ws, double ss, boolean bPaddZerosForFinalPitchMark, int offset) { // Interpolate unvoiced segments double[] interpf0s = interpolate_pitch_uv(f0s); int numfrm = f0s.length; int maxTotalPitchMarks = len; int[] tmpPitchMarks = MathUtils.zerosInt(maxTotalPitchMarks); float[] tmpF0s = new float[maxTotalPitchMarks]; int count = 0; int prevInd = 1; int ind; double T0; assert offset >= 0; for (int i = 1; i <= len; i++) { ind = (int) (Math.floor(((i - 1.0) / fs - 0.5 * ws) / ss + 0.5) + 1); if (ind < 1) ind = 1; if (ind > numfrm) ind = numfrm; if (interpf0s[ind - 1] > 10.0) T0 = (fs / interpf0s[ind - 1]); else T0 = (fs / 100.0f); if (i == 1 || i - T0 >= prevInd) // Insert new pitch mark { count++; tmpPitchMarks[count - 1] = i - 1 + offset; prevInd = i; if (i > 1) tmpF0s[count - 2] = (float) f0s[ind - 1]; } } PitchMarks pm = null; if (count > 1) { // Check if last pitch mark corresponds to the end of signal, otherwise put an additional pitch mark to match the last // period and note to padd sufficient zeros if (bPaddZerosForFinalPitchMark && tmpPitchMarks[count - 1] != len - 1) { pm = new PitchMarks(count + 1, tmpPitchMarks, tmpF0s, 0); pm.pitchMarks[pm.pitchMarks.length - 1] = pm.pitchMarks[pm.pitchMarks.length - 2] + (pm.pitchMarks[pm.pitchMarks.length - 2] - pm.pitchMarks[pm.pitchMarks.length - 3]); pm.totalZerosToPadd = pm.pitchMarks[pm.pitchMarks.length - 1] - (len - 1); } else pm = new PitchMarks(count, tmpPitchMarks, tmpF0s, 0); } return pm; } // Convert pitch marks to pitch contour values in Hz using a fixed analysis rate // Note that this function might result in inaccurate f0 values if the pitch marks are created from an f0 contour // The inaccuracy is due to conversion from float/double f0 values to integer pitch marks public static double[] pitchMarks2PitchContour(int[] pitchMarks, float ws, float ss, int samplingRate) { double[] f0s = null; float[] times = samples2times(pitchMarks, samplingRate); int numfrm = (int) Math.floor((times[times.length - 1] - 0.5 * ws) / ss + 0.5); if (numfrm > 0) { f0s = new double[numfrm]; int currentInd; float currentTime; float T0; for (int i = 0; i < numfrm; i++) { currentTime = i * ss + 0.5f * ws; currentInd = MathUtils.findClosest(times, currentTime); if (currentInd > 0) f0s[i] = 1.0 / (times[currentInd] - times[currentInd - 1]); else f0s[i] = 1.0 / times[currentInd]; } } return f0s; } public static double[] fixedRateF0Values(PitchMarks pm, double wsFixedInSeconds, double ssFixedInSeconds, int numfrm, int samplingRate) { double[] f0s = new double[numfrm]; int i, ind, sample; boolean isVoiced; for (i = 0; i < numfrm; i++) { sample = SignalProcUtils.time2sample((float) (i * ssFixedInSeconds + 0.5 * wsFixedInSeconds), samplingRate); ind = MathUtils.findClosest(pm.pitchMarks, sample); f0s[i] = 0.0; if (ind < 0) { if (sample > pm.pitchMarks[pm.pitchMarks.length - 1]) ind = pm.pitchMarks.length - 1; else ind = 1; } isVoiced = pm.f0s[ind - 1] > 10.0 ? true : false; if (isVoiced) { if (ind > 0) f0s[i] = ((double) samplingRate) / (pm.pitchMarks[ind] - pm.pitchMarks[ind - 1]); else f0s[i] = ((double) samplingRate) / (pm.pitchMarks[ind + 1] - pm.pitchMarks[ind]); } } return f0s; } public static double[] interpolate_pitch_uv(double[] f0s) { return interpolate_pitch_uv(f0s, 10.0); } // Interpolates unvoiced parts of the f0 contour // using the neighbouring voiced parts // Linear interpolation is used public static double[] interpolate_pitch_uv(double[] f0s, double minVoicedVal) { int[] ind_v = MathUtils.find(f0s, MathUtils.GREATER_THAN, minVoicedVal); double[] new_f0s = null; if (ind_v == null) { new_f0s = new double[f0s.length]; System.arraycopy(f0s, 0, new_f0s, 0, f0s.length); } else { double[] tmp_f0s = new double[f0s.length]; System.arraycopy(f0s, 0, tmp_f0s, 0, f0s.length); int[] ind_v2 = null; if (ind_v[0] != 0) { tmp_f0s[0] = MathUtils.mean(f0s, ind_v); ind_v2 = new int[ind_v.length + 1]; ind_v2[0] = 0; System.arraycopy(ind_v, 0, ind_v2, 1, ind_v.length); } else { ind_v2 = new int[ind_v.length]; System.arraycopy(ind_v, 0, ind_v2, 0, ind_v.length); } int[] ind_v3 = null; if (ind_v2[ind_v2.length - 1] != tmp_f0s.length - 1) { tmp_f0s[tmp_f0s.length - 1] = tmp_f0s[ind_v2[ind_v2.length - 1]]; ind_v3 = new int[ind_v2.length + 1]; System.arraycopy(ind_v2, 0, ind_v3, 0, ind_v2.length); ind_v3[ind_v2.length] = f0s.length - 1; } else { ind_v3 = new int[ind_v2.length]; System.arraycopy(ind_v2, 0, ind_v3, 0, ind_v2.length); } int i; double[] y = new double[ind_v3.length]; for (i = 0; i < ind_v3.length; i++) y[i] = tmp_f0s[ind_v3[i]]; int[] xi = new int[f0s.length]; for (i = 0; i < f0s.length; i++) xi[i] = i; new_f0s = MathUtils.interpolate_linear(ind_v3, y, xi); } return new_f0s; } // A least squares line is fit to the given contour // and the parameters of the line are returned, i.e. line[0]=intercept and line[1]=slope public static double[] getContourLSFit(double[] contour, boolean isPitchUVInterpolation) { double[] line = null; if (contour != null) { double[] newContour = new double[contour.length]; System.arraycopy(contour, 0, newContour, 0, contour.length); if (isPitchUVInterpolation) newContour = SignalProcUtils.interpolate_pitch_uv(newContour); double[] indices = new double[contour.length]; for (int i = 0; i < contour.length; i++) indices[i] = i; line = fitLeastSquaresLine(indices, newContour); } return line; } public static double[] fitLeastSquaresLine(double[] x, double[] y) { assert x != null; assert y != null; assert x.length == y.length; double[] params = new double[2]; double sx = 0.0; double sy = 0.0; double sxx = 0.0; double sxy = 0.0; double delta; int numPoints = x.length; for (int i = 0; i < numPoints; i++) { sx += x[i]; sy += y[i]; sxx += x[i] * x[i]; sxy += x[i] * y[i]; } delta = numPoints * sxx - sx * sx; // Intercept params[0] = (sxx * sy - sx * sxy) / delta; // Slope params[1] = (numPoints * sxy - sx * sy) / delta; return params; } public static boolean getVoicing(double[] windowedSpeechFrame, int samplingRateInHz) { return getVoicing(windowedSpeechFrame, samplingRateInHz, 0.35f); } public static boolean getVoicing(double[] windowedSpeechFrame, int samplingRateInHz, double voicingThreshold) { double Pvoiced = getVoicingProbability(windowedSpeechFrame, samplingRateInHz); return Pvoiced >= voicingThreshold; } public static double getVoicingProbability(double[] windowedSpeechFrame, int samplingRateInHz) { int maxT0 = (int) ((double) samplingRateInHz / 40.0); int minT0 = (int) ((double) samplingRateInHz / 400.0); if (maxT0 > windowedSpeechFrame.length - 1) maxT0 = windowedSpeechFrame.length - 1; if (minT0 > maxT0) minT0 = maxT0; double[] R = SignalProcUtils.autocorr(windowedSpeechFrame, maxT0); double maxR = R[minT0]; for (int i = minT0 + 1; i <= maxT0; i++) { if (R[i] > maxR) maxR = R[i]; } double Pvoiced = maxR / R[0]; return Pvoiced; } public static double[] autocorr(double[] x, int LPOrder) { int N = x.length; double[] R = new double[LPOrder + 1]; int n, m; for (m = 0; m <= LPOrder; m++) { R[m] = 0.0; for (n = 0; n <= x.length - m - 1; n++) R[m] += x[n] * x[n + m]; } return R; } // Apply a 1st order highpass preemphasis filter to speech frame frm public static double[] applyPreemphasis(double[] frm, double preCoef) { double[] frmOut = new double[frm.length]; System.arraycopy(frm, 0, frmOut, 0, frm.length); if (preCoef > 0.0) { double[] coeffs = new double[2]; coeffs[0] = 1.0; coeffs[1] = -preCoef; RecursiveFilter r = new RecursiveFilter(coeffs); r.apply(frmOut); } return frmOut; } // Remove preemphasis from preemphasized frame frm (i.e. 1st order lowspass filtering) public static double[] removePreemphasis(double[] frm, double preCoef) { double[] frmOut = null; if (frm != null && frm.length > 0) { frmOut = new double[frm.length]; System.arraycopy(frm, 0, frmOut, 0, frm.length); if (preCoef > 0.0) { double[] coeffs = new double[2]; coeffs[0] = 1.0; coeffs[1] = preCoef; RecursiveFilter r = new RecursiveFilter(coeffs); r.apply(frmOut); } } return frmOut; } public static double[] freq2bark(double[] freqsInHz) { double[] barks = null; if (freqsInHz != null) { barks = new double[freqsInHz.length]; for (int i = 0; i < barks.length; i++) barks[i] = freq2bark(freqsInHz[i]); } return barks; } public static double freq2bark(double freqInHz) { return 13.0 * Math.atan(0.00076 * freqInHz) + 3.5 * Math.atan((freqInHz * freqInHz / (7500 * 7500))); } /** * Since there is no asinh in Math, here it is used its definition: asinh(x) = ln( x + sqrt(x^2+1) ) This function is used in * fft2barkmx() * * @param freqInHz * frequency In Hz * @return 6 times log of f + square root of f times t + 1 */ public static double hz2bark(double freqInHz) { // if should be: return 6 * asinh(f/600); double f = freqInHz / 600; return 6 * Math.log(f + Math.sqrt((f * f) + 1)); } public static double freq2barkNew(double freqInHz) { if (freqInHz >= 605.0) return 13.0 * Math.atan(0.00076 * freqInHz); // 5.60265754 else return 8.7 + 14.2 * Math.log10(1e-50 + freqInHz / 1000.0); // 5.60092632 } public static double barkNew2freq(double barkNew) { if (barkNew >= 5.6017) // Roughly average of the above two values for 605.0 Hz in freq2barkNew return (Math.tan(barkNew / 13.0)) / 0.00076; else return 1000.0 * Math.pow(10.0, (barkNew - 8.7) / 14.2); } public static double[] bark2freq(double[] barks, int samplingRateInHz) { double[] freqsInHz = new double[barks.length]; for (int i = 0; i < barks.length; i++) freqsInHz[i] = bark2freq(barks[i], samplingRateInHz); return freqsInHz; } public static double bark2freq(double bark, int samplingRateInHz) { double midFreqInHz = 0.25 * samplingRateInHz; double stepInHz = 0.5 * 0.25 * samplingRateInHz; double midFreqInBark = SignalProcUtils.freq2bark(midFreqInHz); while (Math.abs(midFreqInBark - bark) > 1e-10) { if (midFreqInBark < bark) midFreqInHz += stepInHz; else midFreqInHz -= stepInHz; stepInHz *= 0.5; midFreqInBark = SignalProcUtils.freq2bark(midFreqInHz); } return midFreqInHz; } public static double barkNew2radian(double bark, int samplingRateInHz) { return SignalProcUtils.hz2radian(barkNew2freq(bark), samplingRateInHz); } /*** * Java ported version of: wts = fft2barkmx(nfft, sr, nfilts, width) Generate a matrix of weights to combine FFT bins into * Bark bins. nfft defines the source FFT size at sampling rate sr. Optional nfilts specifies the number of output bands * required (else one per bark), and width is the constant width of each band in Bark (default 1). While wts has nfft columns, * the second half are all zero. Hence, Bark spectrum is fft2barkmx(nfft,sr)*abs(fft(xincols,nfft)); 2004-09-05 * dpwe@ee.columbia.edu based on rastamat/audspec.m * * @param nfft * FFT size * @param sr * sampling rate * @param nfilts * number of output bark bands * @param width * width of each band in Bark (default 1) * @param minfreq * min frequency * @param maxfreq * max frequency * @return wts */ public static double[][] fft2barkmx(int nfft, int sr, int nfilts, int width, double minfreq, double maxfreq) { int i, j, k; double min_bark = hz2bark(minfreq); double nyqbark = hz2bark(maxfreq) - min_bark; double wts[][] = new double[nfilts][nfft]; for (i = 0; i < nfilts; i++) wts[i] = MathUtils.zeros(nfft); // bark per filter double step_barks = nyqbark / (nfilts - 1); // Frequency of each FFT bin in Bark int halfNfft = (nfft / 2); double binbarks[] = new double[(halfNfft + 1)]; for (i = 0; i < (halfNfft + 1); i++) { binbarks[i] = hz2bark(i * sr / nfft); } double f_bark_mid, aux; double lof[] = new double[(halfNfft + 1)]; double hif[] = new double[(halfNfft + 1)]; for (i = 1; i <= nfilts; i++) { f_bark_mid = min_bark + (i - 1) * step_barks; // Linear slopes in log-space (i.e. dB) intersect to trapezoidal window for (j = 0; j < (halfNfft + 1); j++) { lof[j] = (binbarks[j] - f_bark_mid) / width - 0.5; hif[j] = (binbarks[j] - f_bark_mid) / width + 0.5; } for (k = 0; k < (halfNfft + 1); k++) { aux = Math.min(0, Math.min(hif[k], (-2.5 * lof[k]))); wts[i - 1][k] = Math.pow(10, aux); } } return wts; } // Convert frequency in Hz to frequency sample index // maxFreq corresponds to half sampling rate, i.e. sample no: fftSize/2+1 where freq sample indices are 0,1,...,maxFreq-1 public static int[] freq2index(double[] freqsInHz, int samplingRateInHz, int maxFreq) { int[] inds = null; if (freqsInHz != null && freqsInHz.length > 0) { inds = new int[freqsInHz.length]; for (int i = 0; i < inds.length; i++) inds[i] = freq2index(freqsInHz[i], samplingRateInHz, maxFreq); } return inds; } // maxFreqIndex: Actually we have indices from 0,...,maxFreqIndex-1 public static int freq2index(double freqInHz, double samplingRateInHz, int maxFreqIndex) { int index = (int) Math.floor(freqInHz / (0.5 * samplingRateInHz) * (maxFreqIndex - 1) + 0.5); index = (int) Math.max(0, index); index = (int) Math.min(index, maxFreqIndex); return index; } // Convert frequency in Hz to frequency sample index // maxFreq corresponds to half sampling rate, i.e. sample no: fftSize/2+1 where freq sample indices are 0,1,...,maxFreq-1 public static double[] freq2indexDouble(double[] freqsInHz, double samplingRateInHz, int maxFreq) { double[] inds = null; if (freqsInHz != null && freqsInHz.length > 0) { inds = new double[freqsInHz.length]; for (int i = 0; i < inds.length; i++) inds[i] = freq2indexDouble(freqsInHz[i], samplingRateInHz, maxFreq); } return inds; } public static double freq2indexDouble(double freqInHz, double samplingRateInHz, int maxFreqIndex) { double index = freqInHz / (0.5 * samplingRateInHz) * (maxFreqIndex - 1); index = Math.max(0, index); index = Math.min(index, maxFreqIndex); return index; } // Convert a zero based spectrum index value to frequency in Hz // If fftSize is 512, zeroBasedMaxFreqIndex=256 is the last index public static double index2freq(int zeroBasedFreqIndex, int samplingRateInHz, int zeroBasedMaxFreqIndex) { return zeroBasedFreqIndex * (0.5 * samplingRateInHz) / zeroBasedMaxFreqIndex; } // Convert a zero based spectrum index value to frequency in Hz public static double indexDouble2freq(double zeroBasedFreqIndex, int samplingRateInHz, int zeroBasedMaxFreqIndex) { return zeroBasedFreqIndex * (0.5 * samplingRateInHz) / zeroBasedMaxFreqIndex; } // Convert sample index to time value in seconds public static float sample2time(int sample, int samplingRate) { return ((float) sample) / samplingRate; } public static float sampleFloat2time(float sample, int samplingRate) { return sample / samplingRate; } public static float sample2time(long sample, int samplingRate) { return (float) (((double) sample) / ((double) samplingRate)); } public static float sample2time(float sample, int samplingRate) { return sample / samplingRate; } // Convert time value in seconds to sample index public static int time2sample(float time, int samplingRate) { return (int) Math.floor(time * samplingRate + 0.5f); } public static int[] time2sample(float[] times, int samplingRate) { int[] samples = null; if (times != null && times.length > 0) { samples = new int[times.length]; for (int i = 0; i < times.length; i++) samples[i] = time2sample(times[i], samplingRate); } return samples; } public static int time2sample(double time, int samplingRate) { return (int) Math.floor(time * samplingRate + 0.5); } public static int[] time2sample(double[] times, int samplingRate) { int[] samples = null; if (times != null && times.length > 0) { samples = new int[times.length]; for (int i = 0; i < times.length; i++) samples[i] = time2sample(times[i], samplingRate); } return samples; } public static double time2sampleDouble(double time, int samplingRate) { return time * samplingRate; } // Convert sample indices to time values in seconds public static float[] samples2times(int[] samples, int samplingRate) { float[] times = null; if (samples != null && samples.length > 0) { times = new float[samples.length]; for (int i = 0; i < samples.length; i++) times[i] = ((float) samples[i]) / samplingRate; } return times; } // Convert time values in seconds to sample indices public static int[] times2samples(float[] times, int samplingRate) { int[] samples = null; if (times != null && times.length > 0) { samples = new int[times.length]; for (int i = 0; i < samples.length; i++) samples[i] = (int) Math.floor(times[i] * samplingRate + 0.5f); } return samples; } // Find the scaled version of a given time value t by performing time varying scaling using // scales s at times given by alphas // t: instant of time which we want to convert to the new time scale // s: time scale factors at times given by alphas // alphas: time instants at which the time scale modification factor is the corresponding entry in s public static float timeScaledTime(float t, float[] scales, float[] times) { assert scales != null; if (times != null) assert scales.length == times.length; int N = scales.length; float tNew = t; int i; if (N > 0) { if (times == null || t <= times[0]) tNew = t * scales[0]; else { int ind = -1; // greatest time index that t is greater than for (i = 0; i < N; i++) { if (t > times[i]) ind = i; else break; } if (ind == -1) tNew = scales[0] * t; else { tNew = scales[0] * times[0]; for (i = 0; i < ind; i++) tNew += scales[i + 1] * (times[i + 1] - times[i]); tNew += scales[ind] * (t - times[ind]); } } } return tNew; } // Find the scaled version of a set of time values times by performing time varying scaling using // tScales at times given by tScalesTimes public static float[] timeScaledTimes(float[] times, float[] tScales, float[] tScalesTimes) { float[] newTimes = null; if (times != null && times.length > 0) { newTimes = new float[times.length]; for (int i = 0; i < times.length; i++) newTimes[i] = timeScaledTime(times[i], tScales, tScalesTimes); } return newTimes; } // Time scale a pitch contour as specified by a time varying pattern tScales and tScalesTimes // f0s: f0 values // ws: Window size in seconds in f0 analysis // ss: Skip size in seconds in f0 analysis // tScales: time scale factors at times given by tScalesTimes // tScalesTimes: time instants at which the time scale modification factor is the corresponding entry in tScales public static double[] timeScalePitchContour(double[] f0s, float ws, float ss, float[] tScales, float[] tScalesTimes) { if (tScales == null || tScalesTimes == null) return f0s; assert tScales.length == tScalesTimes.length; int i, ind; // First compute the original time axis float[] times = new float[f0s.length]; for (i = 0; i < f0s.length; i++) times[i] = i * ss + 0.5f * ws; float[] newTimes = timeScaledTimes(times, tScales, tScalesTimes); int numfrm = (int) Math.floor((newTimes[newTimes.length - 1] - 0.5 * ws) / ss + 0.5); double[] f0sNew = new double[numfrm]; for (i = 0; i < numfrm; i++) { ind = MathUtils.findClosest(newTimes, i * ss + 0.5f * ws); f0sNew[i] = f0s[ind]; } return f0sNew; } // Pitch scale a pitch contour as specified by a time varying pattern pScales and pScalesTimes // f0s: f0 values // ws: Window size in seconds in f0 analysis // ss: Skip size in seconds in f0 analysis // pScales: pitch scale factors at times given by pScalesTimes // pScalesTimes: time instants at which the pitch scale modification factor is the corresponding entry in pScales public static double[] pitchScalePitchContour(double[] f0s, float ws, float ss, float[] pScales, float[] pScalesTimes) { if (pScales == null || pScalesTimes == null) return f0s; assert pScales.length == pScalesTimes.length; int i, smallerCloseInd; float currentTime; float alpha; double[] f0sNew = new double[f0s.length]; for (i = 0; i < f0s.length; i++) { currentTime = i * ss + 0.5f * ws; smallerCloseInd = MathUtils.findClosest(pScalesTimes, currentTime); if (pScalesTimes[smallerCloseInd] > currentTime) smallerCloseInd--; if (smallerCloseInd >= 0 && smallerCloseInd < pScales.length - 1) { alpha = (pScalesTimes[smallerCloseInd + 1] - currentTime) / (pScalesTimes[smallerCloseInd + 1] - pScalesTimes[smallerCloseInd]); f0sNew[i] = (alpha * pScales[smallerCloseInd] + (1.0f - alpha) * pScales[smallerCloseInd + 1]) * f0s[i]; } else { smallerCloseInd = Math.max(0, smallerCloseInd); smallerCloseInd = Math.min(smallerCloseInd, pScales.length - 1); f0sNew[i] = pScales[smallerCloseInd] * f0s[i]; } } return f0sNew; } // Returns samples from a white noise process. The sample amplitudes are between [-0.5,0.5] public static double[] getNoise(double startFreqInHz, double endFreqInHz, double transitionBandwidthInHz, int samplingRateInHz, int len) { return getNoiseNormalizedFreqs(startFreqInHz / samplingRateInHz, endFreqInHz / samplingRateInHz, transitionBandwidthInHz / samplingRateInHz, len); } public static double[] getNoiseNormalizedFreqs(double normalizedStartFreq, double normalizedEndFreq, double normalizedTransitionBandwidth, int len) { double[] noise = null; if (len > 0) { FIRFilter f = null; int origLen = len; noise = new double[origLen]; if (normalizedStartFreq != 0.0 || normalizedEndFreq != 0.5) // else --> No filtering is required { if (normalizedStartFreq > 0.0 && normalizedEndFreq < 0.5) // Bandpass f = new BandPassFilter(normalizedStartFreq, normalizedEndFreq, normalizedTransitionBandwidth); else if (normalizedStartFreq > 0.0) f = new HighPassFilter(normalizedStartFreq, normalizedTransitionBandwidth); else if (normalizedEndFreq < 0.5f) f = new LowPassFilter(normalizedEndFreq, normalizedTransitionBandwidth); origLen = len; len += 3 * f.getImpulseResponseLength(); // This is for avoiding initial zeros in the beginning of the signal } if (f == null) { for (int i = 0; i < origLen; i++) noise[i] = Math.random() - 0.5; return noise; } else { double[] noise2 = new double[len]; for (int i = 0; i < len; i++) noise2[i] = Math.random() - 0.5; noise2 = f.apply(noise2); System.arraycopy(noise2, f.getImpulseResponseLength(), noise, 0, origLen); } } return noise; } public static float radian2hz(float rad, int samplingRate) { return (float) ((rad / MathUtils.TWOPI) * samplingRate); } public static double radian2hz(double rad, int samplingRate) { return (rad / MathUtils.TWOPI) * samplingRate; } public static float hz2radian(float hz, int samplingRate) { return (float) (hz * MathUtils.TWOPI / samplingRate); } public static double hz2radian(double hz, int samplingRate) { return hz * MathUtils.TWOPI / samplingRate; } // Median filtering: All values in x are replaced by the median of the 3-closest context neighbours (i.e. the left value, the // value itself, and the right value) // The output y[k] is the median of x[k-1], x[k], and x[k+1] // All out-of-boundary values are assumed 0.0 public static double[] medianFilter(double[] x) { return medianFilter(x, 3); } public static float[] medianFilter(float[] x) { return medianFilter(x, 3); } public static float[] medianFilter(float[] x, int N) { double[] x2 = new double[x.length]; int i; for (i = 0; i < x.length; i++) x2[i] = x[i]; x2 = medianFilter(x2, N); float[] y = new float[x.length]; for (i = 0; i < x.length; i++) y[i] = (float) (x2[i]); return y; } // Median filtering: All values in x are replaced by the median of the N closest context neighbours and the value itself // If N is odd, the output y[k] is the median of x[k-(N-1)/2],...,x[k+(N-1)/2] // If N is even, the output y[k] is the median of x[k-(N/2)+1],...,x[k+(N/2)-1], i.e. the average of the (N/2-1)th and (N/2)th // of the sorted values public static double[] medianFilter(double[] x, int N) { double[] y = new double[x.length]; Vector<Double> v = new Vector<Double>(); int k, j, midVal; if (N % 2 == 1) // Odd version { midVal = (N - 1) / 2; for (k = 0; k < x.length; k++) { /* * for (j=0; j<midVal; j++) { if (k-j>=0) v.add(x[k-j]); else v.add(leftOutOfBound); } * * for (j=midVal; j<N; j++) { if (k+j<x.length) v.add(x[k+j]); else v.add(rightOutOfBound); } */ // MS, 27.2.09: Ignore left/right out of bound; use window of size N, centered around current point // if possible but staying within the range of data. int iLeft = Math.max(0, k - midVal); int iRight = Math.min(x.length - 1, iLeft + N); for (j = iLeft; j <= iRight; j++) { v.add(x[j]); } Collections.sort(v); y[k] = ((Double) (v.get(midVal))).doubleValue(); v.clear(); } } else // Even version { midVal = N / 2 - 1; for (k = 0; k < x.length; k++) { /* * for (j=0; j<=midVal; j++) { if (k-j>=0) v.add(x[k-j]); else v.add(leftOutOfBound); } * * for (j=midVal+1; j<N; j++) { if (k+j<x.length) v.add(x[k+j]); else v.add(rightOutOfBound); } */ // MS, 27.2.09: Ignore left/right out of bound; use window of size N, centered around current point // if possible but staying within the range of data. int iLeft = Math.max(0, k - midVal); int iRight = Math.min(x.length - 1, k + midVal); for (j = iLeft; j <= iRight; j++) { v.add(x[j]); } Collections.sort(v); if (midVal + 1 < v.size()) y[k] = 0.5 * (((Double) (v.get(midVal))).doubleValue() + ((Double) (v.get(midVal + 1))).doubleValue()); else y[k] = ((Double) (v.get(midVal))).doubleValue(); v.clear(); } } return y; } public static double[] meanFilter(double[] x, int N) { return meanFilter(x, N, x[0], x[x.length - 1]); } public static float[] meanFilter(float[] x, int N) { return meanFilter(x, N, x[0], x[x.length - 1]); } public static float[] meanFilter(float[] x, int N, float leftOutOfBound, float rightOutOfBound) { double[] xd = ArrayUtils.copyFloat2Double(x); xd = meanFilter(xd, N, (double) leftOutOfBound, (double) rightOutOfBound); return ArrayUtils.copyDouble2Float(xd); } // Mean filtering: All values in x are replaced by the mean of the N closest context neighbours and the value itself // If N is odd, the output y[k] is the mean of x[k-(N-1)/2],...,x[k+(N-1)/2] // If N is even, the output y[k] is the mean of x[k-(N/2)+1],...,x[k+(N/2)-1] // The out-of-boundary values are assumed leftOutOfBound for k-i<0 and rightOutOfBound for k+i>x.length-1 public static double[] meanFilter(double[] x, int N, double leftOutOfBound, double rightOutOfBound) { double[] y = new double[x.length]; Vector<Double> v = new Vector<Double>(); int k, j, midVal; if (N % 2 == 1) // Odd version { midVal = (N - 1) / 2; for (k = 0; k < x.length; k++) { for (j = 0; j < midVal; j++) { if (k - j >= 0) v.add(x[k - j]); else v.add(leftOutOfBound); } for (j = midVal; j < N; j++) { if (k + j < x.length) v.add(x[k + j]); else v.add(rightOutOfBound); } y[k] = mean(v); v.clear(); } } else // Even version { midVal = N / 2 - 1; for (k = 0; k < x.length; k++) { for (j = 0; j <= midVal; j++) { if (k - j >= 0) v.add(x[k - j]); else v.add(leftOutOfBound); } for (j = midVal + 1; j < N; j++) { if (k + j < x.length) v.add(x[k + j]); else v.add(rightOutOfBound); } y[k] = mean(v); v.clear(); } } return y; } public static double mean(Vector<Double> v) { double m = 0.0; for (int i = 0; i < v.size(); i++) m += ((Double) (v.get(i))).doubleValue(); m /= v.size(); return m; } public static float frameIndex2Time(int zeroBasedFrameIndex, float windowSizeInSeconds, float skipSizeInSeconds) { return Math.max(0.0f, 0.5f * windowSizeInSeconds + zeroBasedFrameIndex * skipSizeInSeconds); } public static double frameIndex2Time(int zeroBasedFrameIndex, double windowSizeInSeconds, double skipSizeInSeconds) { return Math.max(0.0, 0.5 * windowSizeInSeconds + zeroBasedFrameIndex * skipSizeInSeconds); } public static int time2frameIndex(float time, float windowSizeInSeconds, float skipSizeInSeconds) { return (int) Math.max(0, Math.floor((time - 0.5f * windowSizeInSeconds) / skipSizeInSeconds + 0.5)); } public static int time2frameIndex(double time, double windowSizeInSeconds, double skipSizeInSeconds) { return (int) Math.max(0, Math.floor((time - 0.5 * windowSizeInSeconds) / skipSizeInSeconds + 0.5)); } // Center-clipping using the amount <ratio> // Valid values of ratio are in the range [0.0,1.0] // greater values result in more clipping (i.e. with 1.0 you will get all zeros at the output) public static void centerClip(double[] x, double ratio) { if (ratio < 0.0) ratio = 0.0; if (ratio > 1.0) ratio = 1.0; double positiveMax = MathUtils.getMax(x); double negativeMax = MathUtils.getMin(x); double positiveTh = positiveMax * ratio; double negativeTh = negativeMax * ratio; for (int i = 0; i < x.length; i++) { if (x[i] > positiveTh) x[i] -= positiveTh; else if (x[i] < negativeTh) x[i] -= negativeTh; else x[i] = 0.0; } } public static double[] getVoiceds(double[] f0s) { double[] voiceds = null; if (f0s != null) { int totalVoiceds = 0; int i; for (i = 0; i < f0s.length; i++) { if (f0s[i] > 10.0) totalVoiceds++; } if (totalVoiceds > 0) { voiceds = new double[totalVoiceds]; int count = 0; for (i = 0; i < f0s.length; i++) { if (f0s[i] > 10.0) voiceds[count++] = f0s[i]; if (count >= totalVoiceds) break; } } } return voiceds; } // Convert an f0 contour into a log-f0 contour by handling unvoiced parts specially // The unvoiced values (i.e. f0 values less than or equal to 10 Hz are set to 0.0 public static double[] getLogF0s(double[] f0s) { return MathUtils.log(f0s, 10.0, 0.0); } // Inverse of getLogF0s functions // i.e. log f0 values are converted to values in Hz with special handling of unvoiceds public static double[] getExpF0s(double[] logF0s) { double[] f0s = null; if (logF0s != null) { f0s = new double[logF0s.length]; for (int i = 0; i < f0s.length; i++) { if (logF0s[i] > Math.log(10.0)) f0s[i] = Math.exp(logF0s[i]); else f0s[i] = 0.0; } } return f0s; } public static double getF0Range(double[] f0s) { return getF0Range(f0s, 0.10, 0.10); } public static double getF0Range(double[] f0s, double percentileMin, double percentileMax) { double range = 0.0; double[] voiceds = SignalProcUtils.getVoiceds(f0s); if (voiceds != null) { if (percentileMin < 0.0) percentileMin = 0.0; if (percentileMin > 1.0) percentileMin = 1.0; if (percentileMax < 0.0) percentileMax = 0.0; if (percentileMax > 1.0) percentileMax = 1.0; MathUtils.quickSort(voiceds); int ind1 = (int) Math.floor(voiceds.length * percentileMin + 0.5); int ind2 = (int) Math.floor(voiceds.length * (1.0 - percentileMax) + 0.5); range = Math.max(0.0, voiceds[ind2] - voiceds[ind1]); } return range; } public static int frameIndex2LabelIndex(int zeroBasedFrameIndex, Labels labels, double windowSizeInSeconds, double skipSizeInSeconds) { double time = zeroBasedFrameIndex * skipSizeInSeconds + 0.5 * windowSizeInSeconds; return time2LabelIndex(time, labels); } public static int time2LabelIndex(double time, Labels labels) { int index = 0; for (int i = 0; i < labels.items.length; i++) { if (labels.items[i].time > time) { index = i; break; } } if (index < 0) index = 0; if (index > labels.items.length - 1) index = labels.items.length - 1; return index; } public static double getRmsDistance(double[] x, double[] y) { double rmsDist = 0.0; for (int i = 0; i < Math.min(x.length, y.length); i++) rmsDist += (x[i] - y[i]) * (x[i] - y[i]); rmsDist /= Math.min(x.length, y.length); rmsDist = Math.sqrt(rmsDist); return rmsDist; } public static int[] merge(int[] x1, int[] x2) { int[] y = null; int ylen = 0; if (x1 != null) ylen += x1.length; if (x2 != null) ylen += x2.length; y = new int[ylen]; int pos = 0; if (x1 != null) { System.arraycopy(x1, 0, y, 0, x1.length); pos += x1.length; } if (x2 != null) System.arraycopy(x2, 0, y, pos, x2.length); return y; } public static double[] merge(double[] x1, double[] x2) { double[] y = null; int ylen = 0; if (x1 != null) ylen += x1.length; if (x2 != null) ylen += x2.length; y = new double[ylen]; int pos = 0; if (x1 != null) { System.arraycopy(x1, 0, y, 0, x1.length); pos += x1.length; } if (x2 != null) System.arraycopy(x2, 0, y, pos, x2.length); return y; } // Decimate data by a factor D // For non-integer index values, linear interpolation is performed public static double[] decimate(double[] x, double D) { double[] y = null; if (x != null) { int ylen = (int) Math.floor(x.length / D + 0.5); y = new double[ylen]; double dind = 0.5 * D; int total = 0; int ind1, ind2; while (total < ylen) { ind1 = (int) Math.floor(dind); ind2 = ind1 + 1; if (ind2 > x.length - 1) ind2 = x.length - 1; ind1 = ind2 - 1; y[total++] = (ind2 - dind) * x[ind1] + (dind - ind1) * x[ind2]; dind += D; } } return y; } // Interpolate data by a factor D // For non-integer index values, linear interpolation is performed public static double[] interpolate(double[] x, double D) { double[] y = null; if (x != null) { int ylen = (int) Math.floor(x.length * D + 0.5); y = new double[ylen]; int xind; double xindDouble; for (int i = 0; i < ylen; i++) { xindDouble = i / D; xind = (int) Math.floor(xindDouble); if (xind <= 0) y[i] = (xind - xindDouble) * (2 * x[0] - x[1]) + (1 - xind + xindDouble) * x[xind]; else if (xind > x.length - 1) y[i] = (x[x.length - 1] - x[x.length - 2]) * (xindDouble - x.length + 1) + x[x.length - 1]; else y[i] = (xind - xindDouble) * x[xind - 1] + (1 - xind + xindDouble) * x[xind]; } } return y; } public static double energy(double[] x) { double e = 0.0; for (int i = 0; i < x.length; i++) e += x[i] * x[i]; return e; } public static double[] filter(double[] b, double[] x) { double[] a = new double[1]; a[0] = 1.0; return filter(b, a, x); } public static double[] filter(double[] b, double[] a, double[] x) { return filter(b, a, x, false); } public static double[] filter(double[] b, double[] x, boolean bNormalize) { double[] a = new double[1]; a[0] = 1.0; return filter(b, a, x, bNormalize); } public static double[] filter(double[] b, double[] a, double[] x, boolean bNormalize) { double[] zi = new double[Math.max(a.length, b.length) - 1]; Arrays.fill(zi, 0.0); return filter(b, a, x, bNormalize, zi); } public static double[] filter(double[] b, double[] x, boolean bNormalize, double[] zi) { double[] a = new double[1]; a[0] = 1.0; return filter(b, a, x, bNormalize, zi); } // Time domain digital filtering // a[0]*y[n] = b[0]*x[n] + b[1]*x[n-1] + ... + b[nb]*x[n-nb] // - a[1]*y[n-1] - ... - a[na]*y[n-na] // b and a are filter coefficients (impulse response of the filter) // If bNormalize is true, all the coeffs are normalized with a[0]. // The initial conditions should be specified in zi. // Setting zi to all zeroes causes no initial conditions to be used. // Length of zi should be max(a.length, b.length)-1. public static double[] filter(double[] b, double[] a, double[] x, boolean bNormalize, double[] zi) { int n; double x_terms; double y_terms; int ind; double[] y = new double[x.length]; int nb = b.length - 1; int na = a.length - 1; if (bNormalize) { // Normalize with a[0] first if (a[0] != 1.0) { for (n = 0; n < b.length; n++) b[n] /= a[0]; for (n = 0; n < a.length; n++) a[n] /= a[0]; } } for (n = 0; n < x.length; n++) { x_terms = 0.0; for (ind = n; ind > n - nb - 1; ind--) { if (ind >= 0) x_terms += b[n - ind] * x[ind]; else x_terms += b[n - ind] * zi[-ind - 1]; } y_terms = 0.0; for (ind = n - 1; ind > n - na - 1; ind--) { if (ind >= 0) y_terms += -a[n - ind] * y[ind]; } y[n] = x_terms + y_terms; } return y; } // Time domain digital filtering with phase distortion correction // using filter denumerator b. The numerator is 1.0, i.e. applies FIR filtering public static double[] filtfilt(double[] b, double[] x) { double[] a = new double[1]; a[0] = 1.0; return filtfilt(b, a, x); } // Time domain digital filtering with phase distortion correction // using filter denumerator b and numerator a public static double[] filtfilt(double[] b, double[] a, double[] x) { int nfilt; int i, j; double[] tmpb = null; double[] tmpa = null; if (b.length > a.length) { nfilt = b.length; tmpb = new double[nfilt]; tmpa = new double[nfilt]; for (i = 0; i < b.length; i++) tmpb[i] = b[i]; for (i = 0; i < a.length; i++) tmpa[i] = a[i]; for (i = a.length; i < nfilt; i++) tmpa[i] = 0.0; } else { nfilt = a.length; tmpb = new double[nfilt]; tmpa = new double[nfilt]; for (i = 0; i < a.length; i++) tmpa[i] = a[i]; for (i = 0; i < b.length; i++) tmpb[i] = b[i]; for (i = b.length; i < nfilt; i++) tmpb[i] = 0.0; } int nfact = 3 * (nfilt - 1); // Length of edge transients int rwlen = 3 * nfilt - 5; int ylen = 2 * nfact + x.length; double[] y = new double[ylen]; double[] yRet = null; if (!(x.length <= nfact)) // Input data too short! { // Solve system of linear equations for initial conditions // zi are the steady-state states of the filter b(z)/a(z) in the state-space // implementation of the 'filter' command. int[] rows = new int[rwlen]; for (i = 0; i <= nfilt - 2; i++) rows[i] = i; for (i = nfilt - 1; i <= 2 * nfilt - 4; i++) rows[i] = i - nfilt + 2; for (i = 2 * nfilt - 3; i <= 3 * nfilt - 6; i++) rows[i] = i - 2 * nfilt + 3; int[] cols = new int[rwlen]; for (i = 0; i <= nfilt - 2; i++) cols[i] = 0; for (i = nfilt - 1; i <= 2 * nfilt - 4; i++) cols[i] = i - nfilt + 2; for (i = 2 * nfilt - 3; i <= 3 * nfilt - 6; i++) cols[i] = i - 2 * nfilt + 4; double[] data = new double[rwlen]; data[0] = 1.0 + tmpa[1]; for (i = 1; i <= nfilt - 2; i++) data[i] = tmpa[i + 1]; for (i = nfilt - 1; i <= 2 * nfilt - 4; i++) data[i] = 1.0; for (i = 2 * nfilt - 3; i <= 3 * nfilt - 6; i++) data[i] = -1.0; int N = nfilt - 1; double[][] sp = new double[N][N]; for (i = 0; i < N; i++) { for (j = 0; j < N; j++) sp[i][j] = 0.0f; } for (i = 0; i < rwlen; i++) sp[rows[i]][cols[i]] = data[i]; double[] denum = new double[N]; for (i = 0; i < N; i++) denum[i] = 0.0; for (i = 2; i < nfilt + 1; i++) denum[i - 2] = b[i - 1] - tmpa[i - 1] * b[0]; double[] zi = new double[N]; for (i = 0; i < N; i++) zi[i] = 0.0; sp = MathUtils.inverse(sp); double tmp; for (i = 0; i < N; i++) { tmp = 0.0; for (j = 0; j < N; j++) tmp += sp[i][j] * denum[i]; zi[i] = tmp; } // Extrapolate beginning and end of data sequence using a "reflection // method". Slopes of original and extrapolated sequences match at // the end points. // This reduces end effects. for (i = 0; i < nfact; i++) y[i] = 2 * x[0] - x[nfact - i]; for (i = 0; i < x.length; i++) y[i + nfact] = x[i]; for (i = 0; i < nfact; i++) y[nfact + x.length + i] = 2 * x[x.length - 1] - x[x.length - 2 - i]; // Filter, reverse the data, filter again, and reverse the data again for (i = 0; i < N; i++) zi[i] = zi[i] * y[0]; y = filter(tmpb, tmpa, y, false, zi); y = SignalProcUtils.reverse(y); y = filter(tmpb, tmpa, y, false, zi); y = SignalProcUtils.reverse(y); // remove extrapolated pieces of y to write the output to x yRet = new double[x.length]; for (i = 0; i < x.length; i++) yRet[i] = y[i + nfact]; } else { yRet = filter(b, a, x); } return yRet; } public static double[] filterfd(double[] filterFFTAbsMag, double[] x, double samplingRate) { return filterfd(filterFFTAbsMag, x, samplingRate, 0.020); } public static double[] filterfd(double[] filterFFTAbsMag, double[] x, double samplingRate, double winsize) { return filterfd(filterFFTAbsMag, x, samplingRate, winsize, 0.010); } public static double[] filterfd(double[] filterFFTAbsMag, double[] x, double samplingRate, double winsize, double skipsize) { double[] y = null; if (x != null && filterFFTAbsMag != null) { int ws = (int) Math.floor(winsize * samplingRate + 0.5); int ss = (int) Math.floor(skipsize * samplingRate + 0.5); int maxFreq = filterFFTAbsMag.length; int fftSize = 2 * (maxFreq - 1); if (ws > fftSize) ws = fftSize; int numfrm = (int) Math.floor((x.length - 0.5 * ws) / ss + 0.5) + 1; HammingWindow wgt = new HammingWindow(ws); wgt.normalize(1.0f); y = new double[x.length]; Arrays.fill(y, 0.0); double[] w = new double[x.length]; Arrays.fill(w, 0.0); int i, j; ComplexArray XFRM = new ComplexArray(fftSize); double[] yfrm = new double[ws]; for (i = 1; i <= numfrm; i++) { Arrays.fill(XFRM.real, 0.0); Arrays.fill(XFRM.imag, 0.0); for (j = 1; j <= Math.min(ws, x.length - (i - 1) * ss); j++) XFRM.real[j - 1] = x[(i - 1) * ss + j - 1]; wgt.applyInline(XFRM.real, 0, ws); FFT.transform(XFRM.real, XFRM.imag, false); for (j = 0; j < maxFreq; j++) { XFRM.real[j] *= filterFFTAbsMag[j]; XFRM.imag[j] *= filterFFTAbsMag[j]; } for (j = maxFreq + 1; j <= fftSize; j++) { XFRM.real[j - 1] = XFRM.real[2 * maxFreq - 1 - j]; XFRM.imag[j - 1] = -XFRM.imag[2 * maxFreq - 1 - j]; } FFT.transform(XFRM.real, XFRM.imag, true); System.arraycopy(XFRM.real, 0, yfrm, 0, ws); for (j = (i - 1) * ss + 1; j <= Math.min(x.length, (i - 1) * ss + ws); j++) { y[j - 1] += wgt.value(j - (i - 1) * ss - 1) * yfrm[j - (i - 1) * ss - 1]; w[j - 1] += wgt.value(j - (i - 1) * ss - 1) * wgt.value(j - (i - 1) * ss - 1); } } for (i = 1; i <= x.length; i++) { if (w[i - 1] > 0.0) y[i - 1] /= w[i - 1]; } } return y; } public static void addWhiteNoise(double[] x, double level) { for (int i = 0; i < x.length; i++) x[i] += level * Math.random(); } public static double[] getWhiteNoise(int totalSamples, double maxAbsGain) { double[] n = null; if (totalSamples > 0) { n = new double[totalSamples]; for (int i = 0; i < totalSamples; i++) n[i] = 2.0 * maxAbsGain * (Math.random() - 0.5); } return n; } public static double[] getWhiteNoiseOfVariance(int totalSamples, double variance) { double[] n = getWhiteNoise(totalSamples, 1.0); MathUtils.adjustVariance(n, variance); return n; } public static double[] getWhiteNoiseOfMeanVariance(int totalSamples, double mean, double variance) { double[] n = getWhiteNoise(totalSamples, 1.0); MathUtils.adjustMean(n, mean); MathUtils.adjustVariance(n, variance); return n; } // These functions implement cepstrum computations asdescribed in van Santen et. al.´s book - Chapter 5 // (van Santen, et. al., Progress in Speech Synthesis) public static float[] specLinear2cepstrum(double[] specLinear, int cepsOrder) { int fftSizeFromLen = 2 * (specLinear.length - 1); int fftSize = 2; while (fftSize < fftSizeFromLen) fftSize *= 2; // double[] specLog = MathUtils.log10(specLinear); double[] specLog = MathUtils.log(specLinear); double[] real = new double[fftSize]; double[] imag = new double[fftSize]; Arrays.fill(real, 0.0); Arrays.fill(imag, 0.0); System.arraycopy(specLog, 0, real, 0, Math.min(specLog.length, fftSize)); FFT.transform(real, imag, true); float[] nceps = new float[cepsOrder + 1]; for (int i = 0; i < cepsOrder + 1; i++) nceps[i] = (float) real[i]; return nceps; } public static double cepstrum2linearSpecAmp(float[] ceps, float freqInRadians) { double logM = ceps[0]; for (int m = 1; m < ceps.length; m++) logM += 2 * ceps[m] * Math.cos(m * freqInRadians); // return Math.pow(10.0, logM); return Math.exp(logM); } public static double cepstrum2minimumPhase(float[] ceps, float freqInRadians) { double minPhase = 0.0; for (int m = 1; m < ceps.length; m++) minPhase -= 2 * ceps[m] * Math.sin(m * freqInRadians); return minPhase; } public static double getMaximumFreqOfVoicingInHz(double[] specAmpsLinear, int[] peakInds, int[][] freqBandInds, int samplingRate) { // freqBandInds is an Nx2 array, [0][0]: left index of first band, [0][1]: right index of first band, etc // use overlapping bands in frequency double maximumFreqOfVoicingInHz = 0.5 * samplingRate; int i; for (i = 0; i < freqBandInds.length; i++) { // TO DO: // -Find peakInds in this range // -Compute total energy of non-peaks in this range and divide by total energy in this range // -Take log of the value // -Compare with threshold and declare voicingfor this band } // 3-point Median filter voicing decisions // Find last "voiced" // Declare frequency of voicing as the end of the last voiced band return maximumFreqOfVoicingInHz; } // Converts half-left part of the spectrum into a full spectrum as follows: // fullSpectrum[0 ... maxFreq-1] = halfSpectrum[0 ... maxFreq-1] // fullSpectrum[maxFreq ... fftSize-1] = halfSpectrum[maxFreq-2 ... 1] // where maxFreq-1 = halfSpectrum.length // and fftSize = 2*(maxFreq-1); public static double[] spectralMirror(double[] halfSpectrum) { int maxFreq = halfSpectrum.length + 1; int fftSize = fullSpectrumSize(maxFreq); double[] fullSpectrum = new double[fftSize]; int k; System.arraycopy(halfSpectrum, 0, fullSpectrum, 0, maxFreq - 1); for (k = maxFreq; k <= fftSize - 1; k++) fullSpectrum[k] = halfSpectrum[2 * maxFreq - k - 2]; return fullSpectrum; } // Returns s1+s2 with length max(length(s1), length(s2)) public static double[] addSignals(double[] s1, double[] s2) { int len = 0; if (s1 != null) len = s1.length; if (s2 != null && s2.length > len) len = s2.length; double[] y = null; if (len > 0) y = new double[len]; if (s1 != null) System.arraycopy(s1, 0, y, 0, s1.length); if (s2 != null) { for (int i = 0; i < s2.length; i++) y[i] += s2[i]; } return y; } // Returns gain1.s1+gain2.s2 with length max(length(s1), length(s2)) public static double[] addSignals(double[] s1, double gain1, double[] s2, double gain2) { int len = 0; if (s1 != null) len = s1.length; if (s2 != null && s2.length > len) len = s2.length; double[] y = null; if (len > 0) y = new double[len]; if (s1 != null) System.arraycopy(s1, 0, y, 0, s1.length); if (s2 != null) { for (int i = 0; i < s2.length; i++) y[i] = gain1 * y[i] + gain2 * s2[i]; } else if (s1 != null) { for (int i = 0; i < s1.length; i++) y[i] = gain1 * y[i]; } return y; } public static double[] subtractSignals(double[] s1, double[] s2) { return addSignals(s1, 1.0, s2, -1.0); } public static double[] arFilter(double[] x, double[] a, double lpGain) { return arFilter(x, a, lpGain, null); } public static double[] arFilter(double[] x, float[] a, double lpGain) { return arFilter(x, a, lpGain, null); } public static double[] arFilter(double[] x, float[] a, double lpGain, double[] yInitial) { double[] aDouble = ArrayUtils.copyFloat2Double(a); return arFilter(x, aDouble, lpGain, yInitial); } public static double[] arFilter(double[] x, double[] a, double lpGain, double[] yInitial) { double[] y = new double[x.length]; int p = a.length; int n, k; for (n = 0; n < x.length; n++) { y[n] = lpGain * x[n]; for (k = 1; k <= Math.min(p, n); k++) y[n] += a[k - 1] * y[n - k]; if (yInitial != null) { for (k = n + 1; k <= p; k++) y[n] += a[k - 1] * yInitial[p - k + n]; } } return y; } public static double[] arFilterFreqDomain(double[] windowedFrame, double[] a, double lpGain, double startFreqInHz, double endFreqInHz, int samplingRateInHz) { int k; int fftSize = 2; while (fftSize < windowedFrame.length) fftSize *= 2; int maxFreqInd = fftSize / 2; ComplexArray X = new ComplexArray(fftSize); System.arraycopy(windowedFrame, 0, X.real, 0, windowedFrame.length); if (MathUtils.isPowerOfTwo(fftSize)) FFT.transform(X.real, X.imag, false); else X = FFTMixedRadix.fftComplex(X); double[] H = LpcAnalyser.calcSpecLinear(a, lpGain, fftSize); int startFreqInd = SignalProcUtils.freq2index(startFreqInHz, samplingRateInHz, maxFreqInd); int endFreqInd = SignalProcUtils.freq2index(endFreqInHz, samplingRateInHz, maxFreqInd); for (k = 0; k < startFreqInd; k++) H[k] = 0.0; for (k = endFreqInd + 1; k <= maxFreqInd; k++) H[k] = 0.0; ComplexArray Y = new ComplexArray(fftSize); for (k = 0; k <= maxFreqInd; k++) { Y.real[k] = X.real[k] * H[k]; Y.imag[k] = X.imag[k] * H[k]; ; } for (k = maxFreqInd + 1; k < fftSize; k++) { Y.real[k] = Y.real[fftSize - k]; Y.imag[k] = -1.0 * Y.imag[fftSize - k]; } if (MathUtils.isPowerOfTwo(fftSize)) FFT.transform(Y.real, Y.imag, true); else Y = FFTMixedRadix.ifft(Y); double[] y = new double[windowedFrame.length]; for (k = 0; k < windowedFrame.length; k++) y[k] = Y.real[k]; return y; } public static double[] fdFilter(double[] x, double[] filterFreqResponse) { int i; int maxFreqInd = filterFreqResponse.length - 1; int fftSize = 2 * maxFreqInd; ComplexArray frameDft = SignalProcUtils.getFrameDft(x, fftSize); for (i = 0; i <= maxFreqInd; i++) { frameDft.real[i] *= filterFreqResponse[i]; frameDft.imag[i] *= filterFreqResponse[i]; } for (i = maxFreqInd + 1; i < fftSize; i++) { frameDft.real[i] = frameDft.real[fftSize - i]; frameDft.imag[i] = -1.0 * frameDft.imag[fftSize - i]; } if (MathUtils.isPowerOfTwo(fftSize)) FFT.transform(frameDft.real, frameDft.imag, true); else frameDft = FFTMixedRadix.ifft(frameDft); double[] y = new double[Math.min(x.length, frameDft.real.length)]; for (i = 0; i < x.length; i++) y[i] = frameDft.real[i]; return y; } public static double[] fdFilter(double[] x, float startFreqInHz, float endFreqInHz, int samplingRateInHz, int fftSize) { while (fftSize < x.length) fftSize *= 2; ComplexArray frameDft = SignalProcUtils.getFrameDft(x, fftSize); return fdFilter(frameDft, startFreqInHz, endFreqInHz, samplingRateInHz, x.length); } public static double[] fdFilter(ComplexArray frameDft, float startFreqInHz, float endFreqInHz, int samplingRateInHz, int origLen) { int fftSize = frameDft.real.length; int maxFreqInd = fftSize / 2; int startFreqInd = SignalProcUtils.freq2index(startFreqInHz, samplingRateInHz, maxFreqInd); int endFreqInd = SignalProcUtils.freq2index(endFreqInHz, samplingRateInHz, maxFreqInd); double[] y = null; float totalRmsEnergy = 0.0f; float passbandRmsEnergy = 0.0f; int totalPassbandSamples = 0; int i; for (i = 0; i <= startFreqInd; i++) { totalRmsEnergy += frameDft.real[i] * frameDft.real[i] + frameDft.imag[i] * frameDft.imag[i]; frameDft.real[i] = 0.0; frameDft.imag[i] = 0.0; } for (i = startFreqInd + 1; i < endFreqInd; i++) { totalRmsEnergy += frameDft.real[i] * frameDft.real[i] + frameDft.imag[i] * frameDft.imag[i]; passbandRmsEnergy += frameDft.real[i] * frameDft.real[i] + frameDft.imag[i] * frameDft.imag[i]; totalPassbandSamples++; } for (i = endFreqInd; i <= maxFreqInd; i++) { totalRmsEnergy += frameDft.real[i] * frameDft.real[i] + frameDft.imag[i] * frameDft.imag[i]; frameDft.real[i] = 0.0; frameDft.imag[i] = 0.0; } for (i = maxFreqInd + 1; i < fftSize; i++) { frameDft.real[i] = frameDft.real[fftSize - i]; frameDft.imag[i] = -1.0 * frameDft.imag[fftSize - i]; } if (MathUtils.isPowerOfTwo(fftSize)) FFT.transform(frameDft.real, frameDft.imag, true); else frameDft = FFTMixedRadix.ifft(frameDft); y = new double[Math.min(origLen, frameDft.real.length)]; for (i = 0; i < origLen; i++) y[i] = frameDft.real[i]; return y; } public static void displayDFTSpectrumLinearNoWindowing(double[] frame) { int fftSize = 2; while (fftSize < frame.length) fftSize *= 2; displayDFTSpectrumLinearNoWindowing(frame, fftSize); } public static void displayDFTSpectrumLinearNoWindowing(double[] frame, int fftSize) { displayDFTSpectrumLinear(frame, fftSize, Window.RECT); } public static void displayDFTSpectrumLinear(double[] frame) { int fftSize = 2; while (fftSize < frame.length) fftSize *= 2; displayDFTSpectrumLinear(frame, fftSize); } public static void displayDFTSpectrumLinear(double[] frame, int fftSize) { displayDFTSpectrumLinear(frame, fftSize, Window.HAMMING); } public static void displayDFTSpectrumLinear(double[] frame, int fftSize, int windowType) { Window win = Window.get(windowType, frame.length); win.normalizeSquaredSum(1.0f); double[] frameW = win.apply(frame, 0); while (fftSize < frameW.length) fftSize *= 2; if (fftSize % 2 != 0) fftSize++; ComplexArray frameDft = new ComplexArray(fftSize); System.arraycopy(frameW, 0, frameDft.real, 0, frame.length); if (MathUtils.isPowerOfTwo(fftSize)) FFT.transform(frameDft.real, frameDft.imag, false); else frameDft = FFTMixedRadix.fftComplex(frameDft); DisplayUtils.plot(MathUtils.magnitudeComplex(frameDft)); } public static void displayDFTSpectrumInDBNoWindowing(double[] frame) { int fftSize = 2; while (fftSize < frame.length) fftSize *= 2; displayDFTSpectrumInDBNoWindowing(frame, fftSize); } public static void displayDFTSpectrumInDBNoWindowing(double[] frame, int fftSize) { displayDFTSpectrumInDB(frame, fftSize, Window.RECT); } public static void displayDFTSpectrumInDB(double[] frame) { int fftSize = 2; while (fftSize < frame.length) fftSize *= 2; displayDFTSpectrumInDB(frame, fftSize); } public static void displayDFTSpectrumInDB(double[] frame, int fftSize) { displayDFTSpectrumInDB(frame, fftSize, Window.HAMMING); } public static void displayDFTSpectrumInDB(double[] frame, int fftSize, int windowType) { Window win = Window.get(windowType, frame.length); if (windowType == Window.RECT) win.normalizePeakValue(1.0f); displayDFTSpectrumInDB(frame, fftSize, win.getCoeffs()); } public static void displayDFTSpectrumInDB(double[] frame, int fftSize, double[] wgt) { ComplexArray frameDft = getFrameDft(frame, fftSize, wgt); int maxFreqInd = (int) Math.floor(0.5 * fftSize + 0.5); DisplayUtils.plot(MathUtils.amp2db(MathUtils.magnitudeComplex(frameDft)), 0, maxFreqInd); } public static double[] getFrameHalfMagnitudeSpectrum(double[] frame, int fftSize) { double[] fullSpec = getFrameMagnitudeSpectrum(frame, fftSize, Window.RECT); int maxFreq = SignalProcUtils.halfSpectrumSize(fftSize); double[] halfSpec = ArrayUtils.subarray(fullSpec, 0, maxFreq); return halfSpec; } public static double[] getFrameMagnitudeSpectrum(double[] frame, int fftSize) { return getFrameMagnitudeSpectrum(frame, fftSize, Window.RECT); } public static double[] getFrameHalfMagnitudeSpectrum(double[] frame, int fftSize, int windowType) { double[] fullSpec = getFrameMagnitudeSpectrum(frame, fftSize, windowType); int maxFreq = SignalProcUtils.halfSpectrumSize(fftSize); double[] halfSpec = ArrayUtils.subarray(fullSpec, 0, maxFreq); return halfSpec; } public static double[] getFrameMagnitudeSpectrum(double[] frame, int fftSize, int windowType) { Window win = Window.get(windowType, frame.length); if (windowType == Window.RECT) win.normalizePeakValue(1.0f); return getFrameMagnitudeSpectrum(frame, fftSize, win.getCoeffs()); } public static double[] getFrameHalfMagnitudeSpectrum(double[] frame, int fftSize, double[] wgt) { double[] fullSpec = getFrameMagnitudeSpectrum(frame, fftSize, wgt); int maxFreq = SignalProcUtils.halfSpectrumSize(fftSize); double[] halfSpec = ArrayUtils.subarray(fullSpec, 0, maxFreq); return halfSpec; } public static double[] getFrameMagnitudeSpectrum(double[] frame, int fftSize, double[] wgt) { return MathUtils.magnitudeComplex(getFrameDft(frame, fftSize, wgt)); } // No windowing, i.e. rectangular window public static ComplexArray getFrameDft(double[] frame, int fftSize) { return getFrameDft(frame, fftSize, Window.RECT); } public static ComplexArray getFrameDft(double[] frame, int fftSize, int windowType) { Window win = Window.get(windowType, frame.length); if (windowType == Window.RECT) win.normalizePeakValue(1.0f); double[] wgt = win.getCoeffs(); return getFrameDft(frame, fftSize, wgt); } public static ComplexArray getFrameDft(double[] frame, int fftSize, double[] windowWgt) { double[] frameW = MathUtils.multiply(frame, windowWgt); while (fftSize < frameW.length) fftSize *= 2; if (fftSize % 2 != 0) fftSize++; ComplexArray frameDft = new ComplexArray(fftSize); System.arraycopy(frameW, 0, frameDft.real, 0, frame.length); if (MathUtils.isPowerOfTwo(fftSize)) FFT.transform(frameDft.real, frameDft.imag, false); else frameDft = FFTMixedRadix.fftComplex(frameDft); return frameDft; } public static void displayLPSpectrumLinear(double[] alpha, double lpGain, int fftSize) { double[] lpSpec = LpcAnalyser.calcSpecLinear(alpha, lpGain, fftSize); DisplayUtils.plot(lpSpec); } public static void displayLPSpectrumInDB(double[] alpha, double lpGain, int fftSize) { double[] lpSpecInDB = MathUtils.amp2db(LpcAnalyser.calcSpecLinear(alpha, lpGain, fftSize)); DisplayUtils.plot(lpSpecInDB); } // Shifts an array by N points (to right if N is positive, to left if negative) // The length of the returned array is the same as the original // Additonal end points are simply replicates of values at boundaries public static double[] shift(double[] x, int N) { if (N == 0) return x; double[] y = new double[x.length]; int i; if (N > 0) { for (i = 0; i < N; i++) y[i] = x[0]; for (i = N; i < x.length - N; i++) y[i] = x[i - N]; } else { N = -1 * N; for (i = 0; i < x.length - N; i++) y[i] = x[i + N]; for (i = x.length - N; i < x.length; i++) y[i] = x[x.length - 1]; } return y; } // If N<0, shift the sequence left // If N>0, shift the sequence right public static float[] shift(float[] x, int N) { if (N == 0) return x; float[] y = new float[x.length]; int i; if (N > 0) // Shift right { for (i = 0; i < N; i++) y[i] = x[0]; for (i = N; i < x.length; i++) y[i] = x[i - N]; } else // Shift left { N = -1 * N; for (i = 0; i < x.length - N; i++) y[i] = x[i + N]; for (i = x.length - N; i < x.length; i++) y[i] = x[x.length - 1]; } return y; } public static double[] getPeakAmplitudes(double[] sDft, double f0InHz, int numHarmonics, int fftSize, double samplingRateInHz, boolean bIncludeZerothHarmonic) { int startHarmonicIndex; if (bIncludeZerothHarmonic) startHarmonicIndex = 0; else startHarmonicIndex = 1; int endHarmonicIndex = numHarmonics; return getPeakAmplitudes(sDft, f0InHz, startHarmonicIndex, endHarmonicIndex, fftSize, samplingRateInHz, true); } public static double[] getPeakAmplitudeFrequencies(double[] sDft, double f0InHz, int numHarmonics, int fftSize, double samplingRateInHz, boolean bIncludeZerothHarmonic) { int startHarmonicIndex; if (bIncludeZerothHarmonic) startHarmonicIndex = 0; else startHarmonicIndex = 1; int endHarmonicIndex = numHarmonics; return getPeakAmplitudes(sDft, f0InHz, startHarmonicIndex, endHarmonicIndex, fftSize, samplingRateInHz, false); } /** * * @param sDft * sDtf * @param f0InHz * f0InHz * @param startHarmonicIndex * startHarmonicIndex * @param endHarmonicIndex * endHarmonicIndex * @param fftSize * fftSize * @param samplingRateInHz * samplingRateInHz * @param amplitudes * : if amplitudes true it returns the amplitude values, original function if amplitudes false it returns the * amplitude frequencies where the peaks were located * @return amps if amplitudes, ampsFreq otherwise */ public static double[] getPeakAmplitudes(double[] sDft, double f0InHz, int startHarmonicIndex, int endHarmonicIndex, int fftSize, double samplingRateInHz, boolean amplitudes) { int maxFreqIndex = (int) Math.floor(0.5 * fftSize + 0.5); int numHarmonics = endHarmonicIndex - startHarmonicIndex + 1; int numAmps = numHarmonics; double[] amps = new double[numAmps]; double[] ampsFreq = new double[numAmps]; int freqStartInd, freqEndInd; int i, k; int zeroBasedMaxFreqIndex = fftSize / 2; for (i = startHarmonicIndex; i <= endHarmonicIndex; i++) { freqStartInd = SignalProcUtils.freq2index(i * f0InHz - 0.3 * f0InHz, (int) samplingRateInHz, maxFreqIndex); freqEndInd = SignalProcUtils.freq2index(i * f0InHz + 0.3 * f0InHz, (int) samplingRateInHz, maxFreqIndex); k = MathUtils.getMaxIndex(sDft, freqStartInd, freqEndInd); amps[i - startHarmonicIndex] = sDft[k]; if (!amplitudes) ampsFreq[i - startHarmonicIndex] = SignalProcUtils.index2freq(k, (int) samplingRateInHz, zeroBasedMaxFreqIndex); } if (amplitudes) return amps; else return ampsFreq; } public static float[] getAnalysisTimes(int numfrm, double windowSizeInSeconds, double frameShiftInSeconds) { float[] analysisTimesInSeconds = null; if (numfrm > 0) { analysisTimesInSeconds = new float[numfrm]; for (int i = 0; i < numfrm; i++) analysisTimesInSeconds[i] = (float) (i * frameShiftInSeconds + 0.5 * windowSizeInSeconds); } return analysisTimesInSeconds; } public static double[][] getMapped(double[][] x, int[] mapInds) { double[][] y = null; if (mapInds != null && x != null) { y = new double[mapInds.length][]; for (int i = 0; i < mapInds.length; i++) y[i] = ArrayUtils.copy(x[mapInds[i]]); } return y; } public static float[][] getMapped(float[][] x, int[] mapInds) { float[][] y = null; if (mapInds != null && x != null) { y = new float[mapInds.length][]; for (int i = 0; i < mapInds.length; i++) y[i] = ArrayUtils.copy(x[mapInds[i]]); } return y; } public static Window getWindow(int windowType, int windowSizeInSamples) { if (windowType == Window.BARTLETT) return new BartlettWindow(windowSizeInSamples); else if (windowType == Window.BLACKMAN) return new BlackmanWindow(windowSizeInSamples); else if (windowType == Window.FLATTOP) return new FlattopWindow(windowSizeInSamples); else if (windowType == Window.GAUSS) return new GaussWindow(windowSizeInSamples); else if (windowType == Window.HAMMING) return new HammingWindow(windowSizeInSamples); else if (windowType == Window.HANNING) return new HanningWindow(windowSizeInSamples); else if (windowType == Window.RECT) return new RectWindow(windowSizeInSamples); else { System.out.println("Undefined window type!"); return null; } } public static int getTotalFrames(int totalSamples, int windowLengthInSamples, int skipSizeInSamples) { int samplingRate = 16000; // Dummy sampling rate which will be cancelled out in calculations anyway return getTotalFrames(sample2time(totalSamples, samplingRate), sample2time(windowLengthInSamples, samplingRate), sample2time(skipSizeInSamples, samplingRate)); } public static int getTotalFrames(int totalSamples, double windowSizeInSeconds, double skipSizeInSeconds) { int samplingRate = 16000; // Dummy sampling rate which will be cancelled out in calculations anyway return getTotalFrames(sample2time(totalSamples, samplingRate), windowSizeInSeconds, skipSizeInSeconds); } public static int getTotalFrames(double totalTimeInSeconds, double windowSizeInSeconds, double skipSizeInSeconds) { int numfrm = 0; if (skipSizeInSeconds > 0.0) { numfrm = (int) Math.floor((totalTimeInSeconds - windowSizeInSeconds) / skipSizeInSeconds + 0.5); if ((numfrm - 1) * skipSizeInSeconds + windowSizeInSeconds < totalTimeInSeconds) numfrm++; } return numfrm; } public static double melNonMultiplied(double freqInRadian, int samplingRateInHz) { return Math.log(1.0 + freqInRadian * samplingRateInHz / (MathUtils.TWOPI * 700)); } public static double radian2mel(double freqInRadian, int samplingRateInHz) { return Math.PI * melNonMultiplied(freqInRadian, samplingRateInHz) / melNonMultiplied(Math.PI, samplingRateInHz); } public static double hz2mel(double freqInHz, int samplingRateInHz) { return radian2mel(SignalProcUtils.hz2radian(freqInHz, samplingRateInHz), samplingRateInHz); } public static double mel2radian(double mel, int samplingRateInHz) { return MathUtils.TWOPI * 700.0 / samplingRateInHz * (-1.0 + Math.exp(mel * melNonMultiplied(Math.PI, samplingRateInHz) / Math.PI)); } public static double mel2hz(double mel, int samplingRateInHz) { return SignalProcUtils.radian2hz(mel2radian(mel, samplingRateInHz), samplingRateInHz); } public static double[] replaceNaNsWith(double[] x, double val) { double[] y = null; if (x != null && x.length > 0) { y = new double[x.length]; for (int i = 0; i < x.length; i++) { if (Double.isNaN(x[i])) y[i] = 0.0; else y[i] = x[i]; } } return y; } public static double sourceTime2targetTime(double sourceTime, Labels sourceLabels, Labels targetLabels) { int[][] map = AlignLabelsUtils.alignLabels(sourceLabels.items, targetLabels.items); return sourceTime2targetTime(sourceTime, sourceLabels, targetLabels, map); } public static double sourceTime2targetTime(double sourceTime, Labels sourceLabels, Labels targetLabels, int[][] map) { int sourceLabInd = SignalProcUtils.time2LabelIndex(sourceTime, sourceLabels); double sourceDuration, targetDuration; double locationInLabelPercent; if (sourceLabInd > 0) { sourceDuration = sourceLabels.items[sourceLabInd].time - sourceLabels.items[sourceLabInd - 1].time; locationInLabelPercent = (sourceTime - sourceLabels.items[sourceLabInd - 1].time) / sourceDuration; } else { sourceDuration = sourceLabels.items[sourceLabInd].time; locationInLabelPercent = sourceTime / sourceLabels.items[sourceLabInd].time; } int targetLabInd = StringUtils.findInMap(map, sourceLabInd); if (targetLabInd > 0) targetDuration = targetLabels.items[targetLabInd].time - targetLabels.items[targetLabInd - 1].time; else targetDuration = targetLabels.items[targetLabInd].time; double targetTime; if (targetLabInd > 0) targetTime = targetLabels.items[targetLabInd - 1].time + locationInLabelPercent * targetDuration; else targetTime = locationInLabelPercent * targetDuration; return targetTime; } public static int[] mapFrameIndices(int numfrmSource, Labels srcLabs, double srcWindowSizeInSeconds, double srcSkipSizeInSeconds, int numFrmTarget, Labels tgtLabs, double tgtWindowSizeInSeconds, double tgtSkipSizeInSeconds) { int[] mappedInds = null; int[][] mappedLabelInds = AlignLabelsUtils.alignLabels(srcLabs.items, tgtLabs.items); if (numfrmSource > 0) { mappedInds = new int[numfrmSource]; double tSource, tTarget, tFrameInd, sourceDuration, sourceLocationInLabelPercent; double sMapStart, tMapStart, sMapEnd, tMapEnd; int sourceLabInd, targetLabInd; int targetFrmInd; for (int i = 0; i < numfrmSource; i++) { tSource = i * srcSkipSizeInSeconds + 0.5 * srcWindowSizeInSeconds; sourceLabInd = SignalProcUtils.time2LabelIndex(tSource, srcLabs); targetLabInd = StringUtils.findInMap(mappedLabelInds, sourceLabInd); if (targetLabInd < 0) { sMapStart = 0.0; tMapStart = 0.0; sMapEnd = srcLabs.items[srcLabs.items.length - 1].time; tMapEnd = tgtLabs.items[tgtLabs.items.length - 1].time; for (int j = targetLabInd - 1; j >= 0; j--) { int prevSourceLabInd = StringUtils.findInMapReverse(mappedLabelInds, j); if (prevSourceLabInd > -1) { sMapStart = srcLabs.items[prevSourceLabInd].time; tMapStart = tgtLabs.items[j].time; break; } } for (int j = targetLabInd + 1; j < tgtLabs.items.length; j++) { int nextSourceLabInd = StringUtils.findInMapReverse(mappedLabelInds, j); if (nextSourceLabInd > -1) { sMapEnd = srcLabs.items[nextSourceLabInd].time; tMapEnd = tgtLabs.items[j].time; break; } } } else { sMapStart = 0.0; if (sourceLabInd > 0) sMapStart = srcLabs.items[sourceLabInd - 1].time; tMapStart = 0.0; if (targetLabInd > 0) tMapStart = tgtLabs.items[targetLabInd - 1].time; sMapEnd = srcLabs.items[sourceLabInd].time; tMapEnd = tgtLabs.items[targetLabInd].time; } tTarget = MathUtils.linearMap(tSource, sMapStart, sMapEnd, tMapStart, tMapEnd); targetFrmInd = SignalProcUtils.time2frameIndex(tTarget, tgtWindowSizeInSeconds, tgtSkipSizeInSeconds); targetFrmInd = MathUtils.CheckLimits(targetFrmInd, 0, numFrmTarget - 1); mappedInds[i] = targetFrmInd; } } return mappedInds; } // This version uses source and target labels to align speech frames public static double[] normalizeVocalTract(double[] srcSignal, double[] tgtSignal, Labels sourceLabels, Labels targetLabels, int windowType, double windowSizeInSeconds, double frameShiftInSeconds, int lpcOrder, int samplingRateInHz, float preCoef) { float[][] sourceLpcs = LpcAnalyser.signal2lpCoeffsf(srcSignal, windowType, windowSizeInSeconds, frameShiftInSeconds, samplingRateInHz, lpcOrder, preCoef); float[] sAnalysisInSeconds = SignalProcUtils .getAnalysisTimes(sourceLpcs.length, windowSizeInSeconds, frameShiftInSeconds); float[][] targetLpcs = LpcAnalyser.signal2lpCoeffsf(tgtSignal, windowType, windowSizeInSeconds, frameShiftInSeconds, samplingRateInHz, lpcOrder, preCoef); // Mapping int[] mappedInds = SignalProcUtils.mapFrameIndices(sourceLpcs.length, sourceLabels, windowSizeInSeconds, frameShiftInSeconds, targetLpcs.length, targetLabels, windowSizeInSeconds, frameShiftInSeconds); float[][] mappedTargetLpcs = SignalProcUtils.getMapped(targetLpcs, mappedInds); return normalizeVocalTract(srcSignal, sAnalysisInSeconds, mappedTargetLpcs, windowType, windowSizeInSeconds, lpcOrder, samplingRateInHz, preCoef); } public static double[] normalizeVocalTract(double[] s, float[] sAnalysisInSeconds, float[][] mappedTgtLpcs, int windowType, double windowSizeInSeconds, int lpcOrderSrc, int samplingRateInHz, float preCoef) { float[][] srcLpcs = LpcAnalyser.signal2lpCoeffsf(s, windowType, sAnalysisInSeconds, windowSizeInSeconds, samplingRateInHz, lpcOrderSrc, preCoef); return normalizeVocalTract(s, sAnalysisInSeconds, srcLpcs, mappedTgtLpcs, windowSizeInSeconds, samplingRateInHz, preCoef); } public static double[] normalizeVocalTract(double[] x, float[] tAnalysisInSeconds, float[][] srcLpcs, float[][] mappedTgtLpcs, double windowSizeInSeconds, int samplingRateInHz, float preCoef) { double[] y = null; assert tAnalysisInSeconds.length == srcLpcs.length; assert tAnalysisInSeconds.length == mappedTgtLpcs.length; int lpOrder = srcLpcs[0].length; int numfrm = tAnalysisInSeconds.length; int ws = SignalProcUtils.time2sample(windowSizeInSeconds, samplingRateInHz); int halfWs = (int) Math.floor(0.5 * ws + 0.5); Window wgt = new HammingWindow(ws); double[] winWgt = wgt.getCoeffs(); double[] frm = new double[ws]; int frmStartIndex; int frmEndIndex; int fftSize = SignalProcUtils.getDFTSize(samplingRateInHz); while (fftSize < ws) fftSize *= 2; ComplexArray expTerm = LpcAnalyser.calcExpTerm(fftSize, lpOrder); y = new double[x.length]; double[] w = new double[x.length]; Arrays.fill(y, 0.0); Arrays.fill(w, 0.0); double[] xPreemp = SignalProcUtils.applyPreemphasis(x, preCoef); int i, k; for (i = 0; i < numfrm; i++) { if (i == 0) frmStartIndex = 0; else frmStartIndex = Math.max(0, SignalProcUtils.time2sample(tAnalysisInSeconds[i] - 0.5 * windowSizeInSeconds, samplingRateInHz)); frmEndIndex = Math.min(frmStartIndex + ws - 1, xPreemp.length - 1); Arrays.fill(frm, 0.0); System.arraycopy(xPreemp, frmStartIndex, frm, 0, frmEndIndex - frmStartIndex + 1); frm = wgt.apply(frm, 0); double origEn = SignalProcUtils.energy(frm); double[] inputVocalTractSpectrum = LpcAnalyser.calcSpecLinearf(srcLpcs[i], 1.0f, fftSize, expTerm); double[] outputVocalTractSpectrum = LpcAnalyser.calcSpecLinearf(mappedTgtLpcs[i], 1.0f, fftSize, expTerm); ComplexArray inputDft = new ComplexArray(fftSize); int maxFreq = fftSize / 2 + 1; Arrays.fill(inputDft.real, 0.0); Arrays.fill(inputDft.imag, 0.0); System.arraycopy(frm, 0, inputDft.real, 0, ws); inputDft = FFTMixedRadix.fftComplex(inputDft); // MaryUtils.plot(MathUtils.amp2db(MathUtils.abs(inputDft))); // MaryUtils.plot(MathUtils.amp2db(inputVocalTractSpectrum)); // MaryUtils.plot(MathUtils.amp2db(outputVocalTractSpectrum)); for (k = 1; k <= maxFreq; k++) { inputDft.real[k - 1] = inputDft.real[k - 1] * outputVocalTractSpectrum[k - 1] / inputVocalTractSpectrum[k - 1]; inputDft.imag[k - 1] = inputDft.imag[k - 1] * outputVocalTractSpectrum[k - 1] / inputVocalTractSpectrum[k - 1]; } for (k = maxFreq + 1; k <= fftSize; k++) { inputDft.real[k - 1] = inputDft.real[2 * maxFreq - 1 - k]; inputDft.imag[k - 1] = -inputDft.imag[2 * maxFreq - 1 - k]; } // MaryUtils.plot(MathUtils.amp2db(MathUtils.abs(inputDft))); inputDft = FFTMixedRadix.ifft(inputDft); // MaryUtils.plot(frm); System.arraycopy(inputDft.real, 0, frm, 0, ws); double newEn = SignalProcUtils.energy(frm); frm = MathUtils.multiply(frm, Math.sqrt(origEn) / Math.sqrt(newEn)); // MaryUtils.plot(frm); for (k = 0; k < ws; k++) { if (frmStartIndex + k > y.length - 1) break; if (i == 0) { if (k < halfWs) { y[frmStartIndex + k] += frm[k] * winWgt[k]; w[frmStartIndex + k] += 1.0; } else { y[frmStartIndex + k] += frm[k] * winWgt[k]; w[frmStartIndex + k] += winWgt[k] * winWgt[k]; } } else if (i == numfrm - 1) { if (k > halfWs) { y[frmStartIndex + k] += frm[k] * winWgt[k]; w[frmStartIndex + k] = 1.0; } else { y[frmStartIndex + k] += frm[k] * winWgt[k]; w[frmStartIndex + k] += winWgt[k] * winWgt[k]; } } else { y[frmStartIndex + k] += frm[k] * winWgt[k]; w[frmStartIndex + k] += winWgt[k] * winWgt[k]; } } System.out.println(String.valueOf(frmStartIndex) + "-" + String.valueOf(frmEndIndex) + " Normalized vocal tract spectrum for frame " + String.valueOf(i + 1) + " of " + String.valueOf(numfrm)); } for (k = 0; k < y.length; k++) { if (w[k] > 0.0) y[k] /= w[k]; } y = SignalProcUtils.removePreemphasis(y, preCoef); // MaryUtils.plot(x); // MaryUtils.plot(y); return y; } public static void test_normalizeVocalTract() throws UnsupportedAudioFileException, IOException { String sourceWavFile = "d:\\src.wav"; String sourceLabFile = "d:\\src.lab"; String targetWavFile = "d:\\tgt.wav"; String targetLabFile = "d:\\tgt.lab"; String outputWavFile = "d:\\srcResidual_tgtVocalTract.wav"; int windowType = Window.HAMMING; double windowSizeInSeconds = 0.020; double frameShiftInSeconds = 0.010; float preCoef = 0.97f; // File input source and LPC analysis AudioInputStream inputAudio = AudioSystem.getAudioInputStream(new File(sourceWavFile)); AudioFormat format = inputAudio.getFormat(); int fsSrc = (int) inputAudio.getFormat().getSampleRate(); int lpcOrderSrc = SignalProcUtils.getLPOrder(fsSrc); AudioDoubleDataSource signal = new AudioDoubleDataSource(inputAudio); double[] s = signal.getAllData(); Labels sourceLabels = new Labels(sourceLabFile); // // File input target and LPC analysis inputAudio = AudioSystem.getAudioInputStream(new File(targetWavFile)); int fsTgt = (int) inputAudio.getFormat().getSampleRate(); int lpcOrderTgt = SignalProcUtils.getLPOrder(fsTgt); signal = new AudioDoubleDataSource(inputAudio); double[] t = signal.getAllData(); Labels targetLabels = new Labels(targetLabFile); // double[] sNorm = normalizeVocalTract(s, t, sourceLabels, targetLabels, windowType, windowSizeInSeconds, frameShiftInSeconds, lpcOrderSrc, fsSrc, preCoef); MaryAudioUtils.writeWavFile(sNorm, outputWavFile, format); } // This version does linear mapping between the whole source and target signals public static double[] normalizeVocalTract(double[] srcSignal, double[] tgtSignal, int windowType, double windowSizeInSeconds, double frameShiftInSeconds, int lpcOrder, int samplingRateInHz, float preCoef) { float[][] sourceLpcs = LpcAnalyser.signal2lpCoeffsf(srcSignal, windowType, windowSizeInSeconds, frameShiftInSeconds, samplingRateInHz, lpcOrder, preCoef); float[] sAnalysisInSeconds = SignalProcUtils .getAnalysisTimes(sourceLpcs.length, windowSizeInSeconds, frameShiftInSeconds); float[][] targetLpcs = LpcAnalyser.signal2lpCoeffsf(tgtSignal, windowType, windowSizeInSeconds, frameShiftInSeconds, samplingRateInHz, lpcOrder, preCoef); // Mapping int[] mappedInds = new int[sourceLpcs.length]; for (int i = 0; i < sourceLpcs.length; i++) mappedInds[i] = MathUtils.linearMap(i, 0, sourceLpcs.length - 1, 0, targetLpcs.length - 1); float[][] mappedTargetLpcs = SignalProcUtils.getMapped(targetLpcs, mappedInds); return normalizeVocalTract(srcSignal, sAnalysisInSeconds, mappedTargetLpcs, windowType, windowSizeInSeconds, lpcOrder, samplingRateInHz, preCoef); } public static void main(String[] args) throws UnsupportedAudioFileException, IOException { /* * LowPassFilter f = new LowPassFilter(0.25, 11); * * double[] b = f.getDenumeratorCoefficients(); * * double[] a = new double[1]; a[0] = 1.0; * * double[] x; double[] y; * * int i; String str; * * x = new double[100]; for (i=0; i<x.length; i++) x[i] = i; * * str = ""; for (i=0; i<x.length; i++) str += String.valueOf(x[i]) + " "; System.out.println(str); * * y = filter(b, a, x); str = "filtered="; for (i=0; i<y.length; i++) str += String.valueOf(y[i]) + " "; * System.out.println(str); * * y = filtfilt(b, a, x); str = "filtfilted="; for (i=0; i<y.length; i++) str += String.valueOf(y[i]) + " "; * System.out.println(str); */ test_normalizeVocalTract(); } }