/** * Copyright 2007 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.signalproc.sinusoidal; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.Vector; import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams; import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal; import marytts.util.math.ComplexArray; import marytts.util.math.MathUtils; import marytts.util.signal.SignalProcUtils; /** * @author Oytun Türk * */ public class SinusoidalTracks { public SinusoidalTrack[] tracks; public int totalTracks; public int currentIndex; public int fs; // Sampling rate in Hz, you can change this using setSamplingRate to synthesize speech at a different sampling // rate public float origDur; // Original duration of the signal modeled by sinusoidal tracks in seconds public float[] voicings; // Voicing probabilities public float absMaxOriginal; // Absolute maximum of the original waveform public float totalEnergy; // Total energy of the original waveform public Vector<double[]> sysAmps; // System amplitudes for each speech frame public Vector<double[]> sysPhases; // System phases for each speech frame public Vector<float[]> sysCeps; // System cepstral coeffs for each speech frame public Vector<ComplexArray> frameDfts; // System phases for each speech frame public float[] times; // Analysis time instants for each speech frame public SinusoidalTracks(int len, int samplingRate) { initialize(len, samplingRate); } public SinusoidalTracks(SinusoidalTracks sinTrks) { this(sinTrks, 0, sinTrks.totalTracks - 1); } public SinusoidalTracks(SinusoidalTracks sinTrks, int startIndex, int endIndex) { copy(sinTrks, startIndex, endIndex); } public void setSamplingRate(int samplingRate) { fs = samplingRate; } public void initialize(int len, int samplingRate) { if (len > 0) { totalTracks = len; tracks = new SinusoidalTrack[totalTracks]; } else { totalTracks = 0; tracks = null; } currentIndex = -1; origDur = 0.0f; setSamplingRate(samplingRate); voicings = null; } // Copy part of the existing tracks in srcTracks into the current tracks // starting from startSinIndex and ending at endSinIndex // including startSinIndex and endSinIndex public void copy(SinusoidalTracks srcTracks, int startTrackIndex, int endTrackIndex) { absMaxOriginal = srcTracks.absMaxOriginal; totalEnergy = srcTracks.totalEnergy; if (startTrackIndex < 0) startTrackIndex = 0; if (endTrackIndex < 0) endTrackIndex = 0; if (endTrackIndex > srcTracks.totalTracks - 1) endTrackIndex = srcTracks.totalTracks - 1; if (startTrackIndex > endTrackIndex) startTrackIndex = endTrackIndex; if (totalTracks < endTrackIndex - startTrackIndex + 1) initialize(endTrackIndex - startTrackIndex + 1, srcTracks.fs); if (totalTracks > 0) { for (int i = startTrackIndex; i <= endTrackIndex; i++) { tracks[i] = new SinusoidalTrack(srcTracks.tracks[i].totalSins); tracks[i].copy(srcTracks.tracks[i]); } currentIndex = endTrackIndex - startTrackIndex; if (srcTracks.origDur > origDur) origDur = srcTracks.origDur; } setVoicings(srcTracks.voicings); setTimes(srcTracks.times); setSystemAmps(srcTracks.sysAmps); setSystemPhases(srcTracks.sysPhases); setSystemCeps(srcTracks.sysCeps); setFrameDfts(srcTracks.frameDfts); } // Copy existing tracks (srcTracks) into the current tracks public void copy(SinusoidalTracks srcTracks) { copy(srcTracks, 0, srcTracks.totalTracks - 1); } // Add a new track to the tracks public void add(SinusoidalTrack track) { if (currentIndex + 1 >= totalTracks) // Expand the current track twice its length and then add { if (totalTracks > 0) { SinusoidalTracks tmpTracks = new SinusoidalTracks(this); if (tmpTracks.totalTracks < 10) initialize(2 * tmpTracks.totalTracks, fs); else if (tmpTracks.totalTracks < 100) initialize(tmpTracks.totalTracks + 20, fs); else if (tmpTracks.totalTracks < 1000) initialize(tmpTracks.totalTracks + 200, fs); else initialize(tmpTracks.totalTracks + 2000, fs); this.copy(tmpTracks); } else initialize(1, fs); } currentIndex++; tracks[currentIndex] = new SinusoidalTrack(1); tracks[currentIndex].copy(track); if (origDur < track.times[track.totalSins - 1]) origDur = track.times[track.totalSins - 1]; } public void add(float time, Sinusoid[] sins, float maxFreqOfVoicing, int state) { for (int i = 0; i < sins.length; i++) { SinusoidalTrack tmpTrack = new SinusoidalTrack(time, sins[i], maxFreqOfVoicing, state); add(tmpTrack); if (time > origDur) origDur = time; } } // Update parameters of <index>th track public void update(int index, SinusoidalTrack track) { if (index < totalTracks) tracks[index].copy(track); } public void getTrackStatistics() { getTrackStatistics(-1.0f, -1.0f); } public void getTrackStatistics(float windowSizeInSeconds, float skipSizeInSeconds) { int longest; double average; int numShorts; int shortLim = 5; int numLongs; int longLim = 15; int i, j; longest = 0; numShorts = 0; numLongs = 0; average = 0.0; for (i = 0; i < totalTracks; i++) { if (tracks[i].totalSins > longest) longest = tracks[i].totalSins; if (tracks[i].totalSins < shortLim) numShorts++; if (tracks[i].totalSins > longLim) numLongs++; average += tracks[i].totalSins; } average /= totalTracks; System.out.println("Total tracks = " + String.valueOf(totalTracks)); if (windowSizeInSeconds > 0 && skipSizeInSeconds > 0) System.out.println("Longest track = " + String.valueOf(longest) + " (" + String.valueOf(longest * skipSizeInSeconds + 0.5 * windowSizeInSeconds) + " sec.)"); else System.out.println("Longest track = " + String.valueOf(longest)); if (windowSizeInSeconds > 0 && skipSizeInSeconds > 0) System.out.println("Mean track length = " + String.valueOf(average) + " (" + String.valueOf(average * skipSizeInSeconds + 0.5 * windowSizeInSeconds) + " sec.)"); else System.out.println("Mean track length = " + String.valueOf(average)); System.out.println("Total tracks shorter than " + String.valueOf(shortLim) + " speech frames = " + String.valueOf(numShorts)); System.out .println("Total tracks longer than " + String.valueOf(longLim) + " speech frames = " + String.valueOf(numLongs)); for (i = 0; i < totalTracks; i++) tracks[i].getStatistics(true, true, fs, i); } public float getOriginalDuration() { return origDur; } public void setOriginalDurationAuto() { for (int i = 0; i < totalTracks; i++) { if (tracks[i].times != null && origDur < tracks[i].times[tracks[i].currentIndex]) origDur = tracks[i].times[tracks[i].currentIndex]; } } public void setOriginalDurationManual(float origDurIn) { origDur = origDurIn; } public void setVoicings(float[] voicingsIn) { if (voicingsIn != null && voicingsIn.length > 0) { voicings = new float[voicingsIn.length]; System.arraycopy(voicingsIn, 0, voicings, 0, voicingsIn.length); } else voicings = null; } public void setTimes(float[] timesIn) { if (timesIn != null && timesIn.length > 0) { times = new float[timesIn.length]; System.arraycopy(timesIn, 0, times, 0, timesIn.length); } else times = null; } public void setSystemAmps(Vector<double[]> sysAmpsIn) { sysAmps = sysAmpsIn; } public void setSystemPhases(Vector<double[]> sysPhasesIn) { sysPhases = sysPhasesIn; } public void setSystemCeps(Vector<float[]> sysCepsIn) { sysCeps = sysCepsIn; } public void setFrameDfts(Vector<ComplexArray> frameDftsIn) { frameDfts = frameDftsIn; } public void writeToTextFile(String filename) throws IOException { File outFile = new File(filename); FileWriter out = new FileWriter(outFile); String str; for (int i = 0; i < this.totalTracks; i++) { str = "*** Track index= " + String.valueOf(i) + "\r\n" + "AMP(lin)\tFREQ(Hz)\tPHASE(rad)\tPHASE(°)\tTIME(sec)" + "\r\n"; out.write(str); for (int j = 0; j < tracks[i].totalSins; j++) { str = String.format("%1$f", tracks[i].amps[j]) + "\t" + String.format("%1$f", SignalProcUtils.radian2hz(tracks[i].freqs[j], fs)) + "\t" + String.format("%1$f", tracks[i].phases[j]) + "\t" + String.format("%1$f", MathUtils.unwrapToRange(MathUtils.radian2degrees(tracks[i].phases[j]), -180.0f)) + "\t" + String.format("%1$f", tracks[i].times[j]) + "\r\n"; out.write(str); } str = "********************************************************" + "\r\n"; out.write(str); } out.close(); } public void setSysAmpsAndTimes(NonharmonicSinusoidalSpeechFrame[] framesSins) { if (framesSins == null || framesSins.length <= 0) { sysAmps = null; sysPhases = null; sysCeps = null; frameDfts = null; times = null; } else { sysAmps = new Vector<double[]>(); sysPhases = new Vector<double[]>(); sysCeps = new Vector<float[]>(); frameDfts = new Vector<ComplexArray>(); times = new float[framesSins.length]; for (int i = 0; i < framesSins.length; i++) { sysAmps.add(framesSins[i].systemAmps); sysPhases.add(framesSins[i].systemPhases); sysCeps.add(framesSins[i].systemCeps); frameDfts.add(framesSins[i].frameDfts); times[i] = framesSins[i].time; } } } public void setSysAmpsAndTimes(HntmSpeechSignal hntmSignal, HntmAnalyzerParams params) { sysAmps = null; sysPhases = null; frameDfts = null; if (hntmSignal == null || hntmSignal.frames == null || hntmSignal.frames.length <= 0) { sysCeps = null; times = null; } else { sysCeps = new Vector<float[]>(); times = new float[hntmSignal.frames.length]; for (int i = 0; i < hntmSignal.frames.length; i++) { sysCeps.add(hntmSignal.frames[i].h.getCeps(hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz, params)); times[i] = hntmSignal.frames[i].tAnalysisInSeconds; } } } }