OctaveVoiceQualityProcessor.java example

Explorer
marytts-master
/**
 * Copyright 2010 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */
package marytts.tools.voiceimport;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.List;
import java.util.SortedMap;
import java.util.StringTokenizer;
import java.util.TreeMap;

import marytts.signalproc.analysis.VoiceQuality;
import marytts.util.io.StreamGobbler;
import marytts.util.MaryUtils;

public class OctaveVoiceQualityProcessor extends VoiceImportComponent {

	protected DatabaseLayout db;
	private String name = "OctaveVoiceQualityProcessor";
	protected String snackExtension = ".snack";
	protected String octaveExtension = ".octave";
	protected String voiceQualityExtension = ".vq";
	protected String scriptSnackFileName;
	protected String scriptOctaveFileName;

	int numVqParams = 5; // number of voice quality parameters extracted from the sound files:
							// OQG, GOG, SKG, RCG, IC

	private int percent = 0;
	// private final String FRAMELENGTH = "0.01"; // Default for snack
	// private final String WINDOWLENGTH = "0.025"; // Default for f0 snack ( formants uses a bigger window)

	public final String SAMPLINGRATE = "OctaveVoiceQualityProcessor.samplingRate";
	public final String MINPITCH = "OctaveVoiceQualityProcessor.minPitch";
	public final String MAXPITCH = "OctaveVoiceQualityProcessor.maxPitch";
	public final String FRAMELENGTH = "OctaveVoiceQualityProcessor.frameLength";
	public final String WINDOWLENGTH = "OctaveVoiceQualityProcessor.windowLength";
	public final String NUMFORMANTS = "OctaveVoiceQualityProcessor.numFormants";
	public final String LPCORDER = "OctaveVoiceQualityProcessor.lpcOrder";
	public final String VQDIR = "OctaveVoiceQualityProcessor.vqDir";
	public final String OCTAVEPATH = "OctaveVoiceQualityProcessor.octavePath";

	protected void setupHelp() {
		if (props2Help == null) {
			props2Help = new TreeMap();
			props2Help.put(SAMPLINGRATE, "Sampling frequency in Hertz. Default: 16000");
			props2Help.put(MINPITCH, "minimum value for the pitch (in Hz). Default: female 60, male 40");
			props2Help.put(MAXPITCH, "maximum value for the pitch (in Hz). Default: female 500, male 400");
			props2Help.put(FRAMELENGTH, "frame length (in seconds) for VQ calculation Default: 0.005 sec.");
			props2Help.put(WINDOWLENGTH, "window length (in seconds) for VQ calculation Default: 0.025 sec.");
			props2Help.put(NUMFORMANTS, "Default 4, maximum 7");
			props2Help.put(LPCORDER, "Default 12, if NUMFORMANTS=4 min LPCORDER=12\n" + "if NUMFORMANTS=5 min LPCORDER=14\n"
					+ "if NUMFORMANTS=6 min LPCORDER=16\n" + "if NUMFORMANTS=7 min LPCORDER=18\n");
			props2Help.put(VQDIR, "directory containing the voice quality files. Will be created if it does not exist");
			props2Help.put(OCTAVEPATH, "octave executable path");
		}
	}

	public final String getName() {
		return name;
	}

	@Override
	protected void initialiseComp() {
		scriptSnackFileName = db.getProp(db.TEMPDIR) + "snack_call.tcl";
		scriptOctaveFileName = db.getProp(db.TEMPDIR) + "octave_call.m";
	}

	public SortedMap getDefaultProps(DatabaseLayout db) {
		this.db = db;
		if (props == null) {
			props = new TreeMap();
			props.put(SAMPLINGRATE, "16000");
			if (db.getProp(db.GENDER).equals("female")) {
				props.put(MINPITCH, "60");
				props.put(MAXPITCH, "400");
			} else {
				props.put(MINPITCH, "60");
				props.put(MAXPITCH, "400");
			}
			props.put(FRAMELENGTH, "0.005");
			props.put(WINDOWLENGTH, "0.025");
			props.put(NUMFORMANTS, "4");
			props.put(LPCORDER, "12");
			props.put(VQDIR, db.getProp(db.ROOTDIR) + "vq" + System.getProperty("file.separator"));
			props.put(OCTAVEPATH, "/usr/bin/octave");
		}
		return props;
	}

	/**
	 * The standard compute() method of the VoiceImportComponent interface.
	 * 
	 * @throws Exception
	 *             Exception
	 */
	public boolean compute() throws Exception {

		/*
		 * In order to get the same number of frames when calculating f0 and formants with snack, we should keep constant the
		 * following variables: -maxpitch 400 for F0 calculation -minpitch 60 for F0 calculation -windowlength 0.03 for formants
		 * calculation -framelength should be the same for f0, formants and this SnackVoiceQualityProcessor, this value can be
		 * change, ex: 0.005, 0.01 etc.
		 */
		File scriptSnack = new File(scriptSnackFileName);

		if (scriptSnack.exists())
			scriptSnack.delete();
		PrintWriter toScriptSnack = new PrintWriter(new FileWriter(scriptSnack));
		toScriptSnack.println("# extracting pitch anf formants using snack");
		toScriptSnack.println("package require snack");
		toScriptSnack.println("snack::sound s");
		toScriptSnack.println("s read [lindex $argv 0]");
		toScriptSnack.println("set fd [open [lindex $argv 1] w]");
		toScriptSnack
				.println("set f0 [s pitch -method esps -maxpitch [lindex $argv 2] -minpitch [lindex $argv 3] -framelength [lindex $argv 4] ]");
		toScriptSnack.println("set f0_length [llength $f0]");
		// toScriptSnack.println("puts \"f0 length = $f0_length\"");
		toScriptSnack
				.println("set formants [s formant -numformants [lindex $argv 5] -lpcorder [lindex $argv 6] -framelength [lindex $argv 4] -windowlength 0.03]");
		toScriptSnack.println("set formants_length [llength $formants]");
		// toScriptSnack.println("puts \"formants length = $formants_length\"");
		toScriptSnack.println("set n 0");
		toScriptSnack.println("foreach line $f0 {");
		toScriptSnack.println("puts -nonewline $fd \"[lindex $line 0] \"");
		toScriptSnack.println("puts $fd [lindex $formants $n]");
		toScriptSnack.println("incr n");
		toScriptSnack.println("}");
		toScriptSnack.println("close $fd");
		toScriptSnack.println("exit");
		toScriptSnack.close();

		File scriptOctave = new File(scriptOctaveFileName);
		if (scriptOctave.exists())
			scriptOctave.delete();
		PrintWriter toScriptOctave = new PrintWriter(new FileWriter(scriptOctave));
		toScriptOctave.println("arg_list = argv ();");
		toScriptOctave.println("cd " + db.getProp(db.TEMPDIR));
		// calculateVoiceQuality(filename, filesnack, gender, par_name, debug);
		toScriptOctave.println("calculateVoiceQuality(arg_list{1}, arg_list{2}, arg_list{3}, arg_list{4});");
		toScriptOctave.close();

		String[] baseNameArray = bnl.getListAsArray();
		// to test String[] baseNameArray = {"curious", "u"};
		System.out.println("Computing voice quality for " + baseNameArray.length + " utterances.");

		/* Ensure the existence of the target pitchmark directory */
		File dir = new File(getProp(VQDIR));
		if (!dir.exists()) {
			System.out.println("Creating the directory [" + getProp(VQDIR) + "].");
			dir.mkdir();
		}

		// Some general parameters that apply to all the sound files
		int samplingRate = Integer.parseInt(getProp(SAMPLINGRATE));
		// frameLength and windowLength in samples
		int frameLength = Math.round(Float.parseFloat(getProp(FRAMELENGTH)) * samplingRate);
		int windowLength = Math.round(Float.parseFloat(getProp(WINDOWLENGTH)) * samplingRate);

		/* execute octave and voice quality parameters extraction */
		for (int i = 0; i < baseNameArray.length; i++) {
			percent = 100 * i / baseNameArray.length;

			/* call snack for calculating f0 and formants */
			String wavFile = db.getProp(db.WAVDIR) + baseNameArray[i] + db.getProp(db.WAVEXT);
			String octaveFile = getProp(VQDIR) + baseNameArray[i] + octaveExtension;
			String snackFile = getProp(VQDIR) + baseNameArray[i] + snackExtension;
			String vqFile = getProp(VQDIR) + baseNameArray[i] + voiceQualityExtension;

			System.out.println("Writing (snack) f0+formants+bandWidths to " + snackFile);
			boolean isWindows = true;
			String strSnackTmp = scriptSnackFileName + " " + wavFile + " " + snackFile + " " + getProp(MAXPITCH) + " "
					+ getProp(MINPITCH) + " " + getProp(FRAMELENGTH) + " " + getProp(NUMFORMANTS) + " " + getProp(LPCORDER);
			if (MaryUtils.isWindows())
				strSnackTmp = "cmd.exe /c " + db.getExternal(db.TCLPATH) + "/tclsh " + strSnackTmp;
			else
				strSnackTmp = db.getExternal(db.TCLPATH) + "/tclsh " + strSnackTmp;
			// System.out.println("Executing: " + strSnackTmp);
			Process snack = Runtime.getRuntime().exec(strSnackTmp);
			StreamGobbler errorGobbler1 = new StreamGobbler(snack.getErrorStream(), "err");
			// read from output stream
			StreamGobbler outputGobbler1 = new StreamGobbler(snack.getInputStream(), "out");
			// start reading from the streams
			errorGobbler1.start();
			outputGobbler1.start();
			// close everything down
			snack.waitFor();
			snack.exitValue();

			/* call octave for calculating VQ parameters */
			// System.out.println("Calculating  OQG GOG SKG RCG IC");
			// TODO: gender does not appear properly
			String strOctaveTmp = getProp(OCTAVEPATH) + " --silent " + scriptOctaveFileName + " " + wavFile + " " + snackFile
					+ " " + getProp(db.GENDER) + " " + octaveFile;
			// System.out.println("Executing: " + strOctaveTmp);
			Process octave = Runtime.getRuntime().exec(strOctaveTmp);
			StreamGobbler errorGobbler2 = new StreamGobbler(octave.getErrorStream(), "err");
			// read from output stream
			StreamGobbler outputGobbler2 = new StreamGobbler(octave.getInputStream(), "out");
			// start reading from the streams
			errorGobbler2.start();
			outputGobbler2.start();
			// close everything down
			octave.waitFor();
			octave.exitValue();

			// Read the sound file
			WavReader soundFile = new WavReader(wavFile);
			// Check sampling rate of sound file
			assert samplingRate == soundFile.getSampleRate();

			// get a wrapper voice quality class for this file
			VoiceQuality vq = new VoiceQuality(numVqParams, samplingRate, frameLength / (float) samplingRate, windowLength
					/ (float) samplingRate);

			readOctaveData(vq, octaveFile);

			System.out.println("Writing (octave) vq parameters to " + vqFile);
			vq.writeVqFile(vqFile);

		}
		return true;
	}

	private void readOctaveData(VoiceQuality vq, String octaveFile) throws IOException {
		double[][] octaveData = null;
		int numLines, numData;
		BufferedReader reader = new BufferedReader(new FileReader(octaveFile));
		int i, j;
		try {
			String line;
			String strVal;
			StringTokenizer s;
			double value;

			// find out the number of lines in the file
			List<String> lines = new ArrayList<String>();
			while ((line = reader.readLine()) != null) {
				lines.add(line);
			}
			numLines = lines.size();
			numData = vq.params.dimension;
			octaveData = new double[numData][numLines];
			for (i = 0; i < numLines; i++) {

				strVal = (String) lines.get(i);
				s = new StringTokenizer(strVal);

				for (j = 0; j < numData; j++) {
					if (s.hasMoreTokens())
						octaveData[j][i] = Double.parseDouble(s.nextToken());
				}
			}
			vq.allocate(numLines, octaveData);

		} catch (IOException ioe) {
			ioe.printStackTrace();
		} catch (NumberFormatException nfe) {
			nfe.printStackTrace();
		}
	}

	/**
	 * Provide the progress of computation, in percent, or -1 if that feature is not implemented.
	 * 
	 * @return -1 if not implemented, or an integer between 0 and 100.
	 */
	public int getProgress() {
		return percent;
	}

	// to test/compare vq values of several files
	public static void main1(String[] args) throws Exception {

		String path = "/project/mary/marcela/HMM-voices/arctic_test/vq-octave/";
		String whisperFile = path + "whisper.vq";
		String modalFile = path + "modal.vq";
		String creakFile = path + "creak.vq";
		String harshFile = path + "harsh.vq";

		VoiceQuality vq1 = new VoiceQuality();
		System.out.println("Reading: " + whisperFile);
		vq1.readVqFile(whisperFile);
		// vq1.printPar();
		vq1.printMeanStd();

		VoiceQuality vq2 = new VoiceQuality();
		System.out.println("Reading: " + modalFile);
		vq2.readVqFile(modalFile);
		// vq2.printPar();
		vq2.printMeanStd();

		VoiceQuality vq3 = new VoiceQuality();
		System.out.println("Reading: " + creakFile);
		vq3.readVqFile(creakFile);
		// vq3.printPar();
		vq3.printMeanStd();

		VoiceQuality vq4 = new VoiceQuality();
		System.out.println("Reading: " + harshFile);
		vq4.readVqFile(harshFile);
		// vq4.printPar();
		vq4.printMeanStd();

	}

	public static void main(String[] args) throws Exception {
		/*
		 * OctaveVoiceQualityProcessor vq = new OctaveVoiceQualityProcessor(); DatabaseLayout db = new DatabaseLayout(vq);
		 * vq.compute();
		 */
		// values extracted with Java program
		// main1(args);

		String file = "/project/mary/marcela/UnitSel-voices/slt-arctic/vq/curious.vq";
		VoiceQuality vq1 = new VoiceQuality();
		System.out.println("Reading: " + file);
		vq1.readVqFile(file);
		vq1.printPar();
		vq1.printMeanStd();
		// MaryUtils.plot(vq1.getGOG(), "Normal");
		// vq1.applyZscoreNormalization();
		// MaryUtils.plot(vq1.getGOG(), "after z-score");

	}

}