SinusoidalTracks.java example

Explorer
marytts-master
/**
 * Copyright 2007 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.signalproc.sinusoidal;

import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Vector;

import marytts.signalproc.sinusoidal.hntm.analysis.HntmAnalyzerParams;
import marytts.signalproc.sinusoidal.hntm.analysis.HntmSpeechSignal;
import marytts.util.math.ComplexArray;
import marytts.util.math.MathUtils;
import marytts.util.signal.SignalProcUtils;

/**
 * @author Oytun Türk
 *
 */
public class SinusoidalTracks {
	public SinusoidalTrack[] tracks;
	public int totalTracks;
	public int currentIndex;
	public int fs; // Sampling rate in Hz, you can change this using setSamplingRate to synthesize speech at a different sampling
					// rate
	public float origDur; // Original duration of the signal modeled by sinusoidal tracks in seconds
	public float[] voicings; // Voicing probabilities
	public float absMaxOriginal; // Absolute maximum of the original waveform
	public float totalEnergy; // Total energy of the original waveform

	public Vector<double[]> sysAmps; // System amplitudes for each speech frame
	public Vector<double[]> sysPhases; // System phases for each speech frame
	public Vector<float[]> sysCeps; // System cepstral coeffs for each speech frame
	public Vector<ComplexArray> frameDfts; // System phases for each speech frame
	public float[] times; // Analysis time instants for each speech frame

	public SinusoidalTracks(int len, int samplingRate) {
		initialize(len, samplingRate);
	}

	public SinusoidalTracks(SinusoidalTracks sinTrks) {
		this(sinTrks, 0, sinTrks.totalTracks - 1);
	}

	public SinusoidalTracks(SinusoidalTracks sinTrks, int startIndex, int endIndex) {
		copy(sinTrks, startIndex, endIndex);
	}

	public void setSamplingRate(int samplingRate) {
		fs = samplingRate;
	}

	public void initialize(int len, int samplingRate) {
		if (len > 0) {
			totalTracks = len;
			tracks = new SinusoidalTrack[totalTracks];
		} else {
			totalTracks = 0;
			tracks = null;
		}

		currentIndex = -1;
		origDur = 0.0f;

		setSamplingRate(samplingRate);

		voicings = null;
	}

	// Copy part of the existing tracks in srcTracks into the current tracks
	// starting from startSinIndex and ending at endSinIndex
	// including startSinIndex and endSinIndex
	public void copy(SinusoidalTracks srcTracks, int startTrackIndex, int endTrackIndex) {
		absMaxOriginal = srcTracks.absMaxOriginal;
		totalEnergy = srcTracks.totalEnergy;

		if (startTrackIndex < 0)
			startTrackIndex = 0;
		if (endTrackIndex < 0)
			endTrackIndex = 0;

		if (endTrackIndex > srcTracks.totalTracks - 1)
			endTrackIndex = srcTracks.totalTracks - 1;
		if (startTrackIndex > endTrackIndex)
			startTrackIndex = endTrackIndex;

		if (totalTracks < endTrackIndex - startTrackIndex + 1)
			initialize(endTrackIndex - startTrackIndex + 1, srcTracks.fs);

		if (totalTracks > 0) {
			for (int i = startTrackIndex; i <= endTrackIndex; i++) {
				tracks[i] = new SinusoidalTrack(srcTracks.tracks[i].totalSins);
				tracks[i].copy(srcTracks.tracks[i]);
			}

			currentIndex = endTrackIndex - startTrackIndex;

			if (srcTracks.origDur > origDur)
				origDur = srcTracks.origDur;
		}

		setVoicings(srcTracks.voicings);
		setTimes(srcTracks.times);
		setSystemAmps(srcTracks.sysAmps);
		setSystemPhases(srcTracks.sysPhases);
		setSystemCeps(srcTracks.sysCeps);
		setFrameDfts(srcTracks.frameDfts);
	}

	// Copy existing tracks (srcTracks) into the current tracks
	public void copy(SinusoidalTracks srcTracks) {
		copy(srcTracks, 0, srcTracks.totalTracks - 1);
	}

	// Add a new track to the tracks
	public void add(SinusoidalTrack track) {
		if (currentIndex + 1 >= totalTracks) // Expand the current track twice its length and then add
		{
			if (totalTracks > 0) {
				SinusoidalTracks tmpTracks = new SinusoidalTracks(this);
				if (tmpTracks.totalTracks < 10)
					initialize(2 * tmpTracks.totalTracks, fs);
				else if (tmpTracks.totalTracks < 100)
					initialize(tmpTracks.totalTracks + 20, fs);
				else if (tmpTracks.totalTracks < 1000)
					initialize(tmpTracks.totalTracks + 200, fs);
				else
					initialize(tmpTracks.totalTracks + 2000, fs);

				this.copy(tmpTracks);
			} else
				initialize(1, fs);
		}

		currentIndex++;

		tracks[currentIndex] = new SinusoidalTrack(1);
		tracks[currentIndex].copy(track);

		if (origDur < track.times[track.totalSins - 1])
			origDur = track.times[track.totalSins - 1];
	}

	public void add(float time, Sinusoid[] sins, float maxFreqOfVoicing, int state) {
		for (int i = 0; i < sins.length; i++) {
			SinusoidalTrack tmpTrack = new SinusoidalTrack(time, sins[i], maxFreqOfVoicing, state);
			add(tmpTrack);

			if (time > origDur)
				origDur = time;
		}
	}

	// Update parameters of <index>th track
	public void update(int index, SinusoidalTrack track) {
		if (index < totalTracks)
			tracks[index].copy(track);
	}

	public void getTrackStatistics() {
		getTrackStatistics(-1.0f, -1.0f);
	}

	public void getTrackStatistics(float windowSizeInSeconds, float skipSizeInSeconds) {
		int longest;
		double average;
		int numShorts;
		int shortLim = 5;

		int numLongs;
		int longLim = 15;

		int i, j;

		longest = 0;
		numShorts = 0;
		numLongs = 0;
		average = 0.0;
		for (i = 0; i < totalTracks; i++) {
			if (tracks[i].totalSins > longest)
				longest = tracks[i].totalSins;

			if (tracks[i].totalSins < shortLim)
				numShorts++;

			if (tracks[i].totalSins > longLim)
				numLongs++;

			average += tracks[i].totalSins;
		}

		average /= totalTracks;

		System.out.println("Total tracks = " + String.valueOf(totalTracks));
		if (windowSizeInSeconds > 0 && skipSizeInSeconds > 0)
			System.out.println("Longest track = " + String.valueOf(longest) + " ("
					+ String.valueOf(longest * skipSizeInSeconds + 0.5 * windowSizeInSeconds) + " sec.)");
		else
			System.out.println("Longest track = " + String.valueOf(longest));

		if (windowSizeInSeconds > 0 && skipSizeInSeconds > 0)
			System.out.println("Mean track length = " + String.valueOf(average) + " ("
					+ String.valueOf(average * skipSizeInSeconds + 0.5 * windowSizeInSeconds) + " sec.)");
		else
			System.out.println("Mean track length = " + String.valueOf(average));

		System.out.println("Total tracks shorter than " + String.valueOf(shortLim) + " speech frames = "
				+ String.valueOf(numShorts));
		System.out
				.println("Total tracks longer than " + String.valueOf(longLim) + " speech frames = " + String.valueOf(numLongs));

		for (i = 0; i < totalTracks; i++)
			tracks[i].getStatistics(true, true, fs, i);
	}

	public float getOriginalDuration() {
		return origDur;
	}

	public void setOriginalDurationAuto() {
		for (int i = 0; i < totalTracks; i++) {
			if (tracks[i].times != null && origDur < tracks[i].times[tracks[i].currentIndex])
				origDur = tracks[i].times[tracks[i].currentIndex];
		}
	}

	public void setOriginalDurationManual(float origDurIn) {
		origDur = origDurIn;
	}

	public void setVoicings(float[] voicingsIn) {
		if (voicingsIn != null && voicingsIn.length > 0) {
			voicings = new float[voicingsIn.length];
			System.arraycopy(voicingsIn, 0, voicings, 0, voicingsIn.length);
		} else
			voicings = null;
	}

	public void setTimes(float[] timesIn) {
		if (timesIn != null && timesIn.length > 0) {
			times = new float[timesIn.length];
			System.arraycopy(timesIn, 0, times, 0, timesIn.length);
		} else
			times = null;
	}

	public void setSystemAmps(Vector<double[]> sysAmpsIn) {
		sysAmps = sysAmpsIn;
	}

	public void setSystemPhases(Vector<double[]> sysPhasesIn) {
		sysPhases = sysPhasesIn;
	}

	public void setSystemCeps(Vector<float[]> sysCepsIn) {
		sysCeps = sysCepsIn;
	}

	public void setFrameDfts(Vector<ComplexArray> frameDftsIn) {
		frameDfts = frameDftsIn;
	}

	public void writeToTextFile(String filename) throws IOException {
		File outFile = new File(filename);
		FileWriter out = new FileWriter(outFile);
		String str;

		for (int i = 0; i < this.totalTracks; i++) {
			str = "*** Track index= " + String.valueOf(i) + "\r\n" + "AMP(lin)\tFREQ(Hz)\tPHASE(rad)\tPHASE(°)\tTIME(sec)"
					+ "\r\n";
			out.write(str);

			for (int j = 0; j < tracks[i].totalSins; j++) {
				str = String.format("%1$f", tracks[i].amps[j]) + "\t"
						+ String.format("%1$f", SignalProcUtils.radian2hz(tracks[i].freqs[j], fs)) + "\t"
						+ String.format("%1$f", tracks[i].phases[j]) + "\t"
						+ String.format("%1$f", MathUtils.unwrapToRange(MathUtils.radian2degrees(tracks[i].phases[j]), -180.0f))
						+ "\t" + String.format("%1$f", tracks[i].times[j]) + "\r\n";

				out.write(str);
			}

			str = "********************************************************" + "\r\n";
			out.write(str);
		}

		out.close();
	}

	public void setSysAmpsAndTimes(NonharmonicSinusoidalSpeechFrame[] framesSins) {
		if (framesSins == null || framesSins.length <= 0) {
			sysAmps = null;
			sysPhases = null;
			sysCeps = null;
			frameDfts = null;
			times = null;
		} else {
			sysAmps = new Vector<double[]>();
			sysPhases = new Vector<double[]>();
			sysCeps = new Vector<float[]>();
			frameDfts = new Vector<ComplexArray>();
			times = new float[framesSins.length];

			for (int i = 0; i < framesSins.length; i++) {
				sysAmps.add(framesSins[i].systemAmps);
				sysPhases.add(framesSins[i].systemPhases);
				sysCeps.add(framesSins[i].systemCeps);
				frameDfts.add(framesSins[i].frameDfts);
				times[i] = framesSins[i].time;
			}
		}
	}

	public void setSysAmpsAndTimes(HntmSpeechSignal hntmSignal, HntmAnalyzerParams params) {
		sysAmps = null;
		sysPhases = null;
		frameDfts = null;

		if (hntmSignal == null || hntmSignal.frames == null || hntmSignal.frames.length <= 0) {
			sysCeps = null;
			times = null;
		} else {
			sysCeps = new Vector<float[]>();
			times = new float[hntmSignal.frames.length];

			for (int i = 0; i < hntmSignal.frames.length; i++) {
				sysCeps.add(hntmSignal.frames[i].h.getCeps(hntmSignal.frames[i].f0InHz, hntmSignal.samplingRateInHz, params));
				times[i] = hntmSignal.frames[i].tAnalysisInSeconds;
			}
		}
	}
}