VoiceDataDumper.java example

Explorer
marytts-master
/**
 * Copyright 2010 DFKI GmbH.
 * All Rights Reserved.  Use is subject to license terms.
 *
 * This file is part of MARY TTS.
 *
 * MARY TTS is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, version 3 of the License.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

package marytts.unitselection.analysis;

import java.io.BufferedOutputStream;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.BufferUnderflowException;

import marytts.exceptions.MaryConfigurationException;
import marytts.features.FeatureDefinition;
import marytts.features.FeatureVector;
import marytts.server.MaryProperties;
import marytts.unitselection.data.FeatureFileReader;
import marytts.unitselection.data.TimelineReader;
import marytts.unitselection.data.Unit;
import marytts.unitselection.data.UnitDatabase;
import marytts.unitselection.data.UnitFileReader;
import marytts.util.data.Datagram;
import marytts.util.data.text.PraatInterval;
import marytts.util.data.text.PraatIntervalTier;
import marytts.util.data.text.PraatTextGrid;

/**
 * Convenience class to dump relevant data from a unit selection voice to a Praat TextGrid and a wav file for inspection of
 * timeline data in external tools (e.g. Praat, WaveSurfer, etc.)
 * 
 * @author steiner
 * 
 */
public class VoiceDataDumper {
	protected UnitDatabase unitDB;

	protected FeatureFileReader featureFileReader;

	protected long numSamples = 0;

	protected FeatureDefinition featureDefinition;

	protected int phoneFeatureIndex;

	protected int halfphoneLRFeatureIndex;

	public VoiceDataDumper() {

	}

	/**
	 * @see marytts.util.data.audio.WavWriter#byteswap(int)
	 * @param val
	 *            val
	 * @return (((val & 0xff000000) >>> 24) + ((val & 0x00ff0000) >>> 8) + ((val & 0x0000ff00)
	 *         << 8) + ((val & 0x000000ff) << 24))
	 */
	protected int byteswap(int val) {
		return (((val & 0xff000000) >>> 24) + ((val & 0x00ff0000) >>> 8) + ((val & 0x0000ff00) << 8) + ((val & 0x000000ff) << 24));
	}

	/**
	 * @see marytts.util.data.audio.WavWriter#byteswap(short)
	 * @param val
	 *            val
	 * @return ((short) ((((int) (val) & 0xff00) >>> 8) + (((int) (val) & 0x00ff) << 8)))
	 */
	protected short byteswap(short val) {
		return ((short) ((((int) (val) & 0xff00) >>> 8) + (((int) (val) & 0x00ff) << 8)));
	}

	/**
	 * Load audio timeline from file
	 * 
	 * @param fileName
	 *            to load
	 * @return TimelineReader
	 * @throws IOException
	 *             IOException
	 * @throws MaryConfigurationException
	 *             MaryConfigurationException
	 */
	protected TimelineReader loadAudioTimeline(String fileName) throws IOException, MaryConfigurationException {
		return new TimelineReader(fileName);
	}

	/**
	 * Load unit database from various relevant files
	 * 
	 * @param audioTimelineFileName
	 *            to load
	 * @param basenameTimelineFileName
	 *            to load
	 * @param unitFileName
	 *            to load
	 * @throws IOException
	 *             IOException
	 * @throws MaryConfigurationException
	 *             MaryConfigurationException
	 */
	protected void loadUnitDatabase(String audioTimelineFileName, String basenameTimelineFileName, String unitFileName)
			throws IOException, MaryConfigurationException {
		unitDB = new UnitDatabase();
		UnitFileReader unitFileReader = new UnitFileReader(unitFileName);
		TimelineReader audioTimelineReader = loadAudioTimeline(audioTimelineFileName);
		TimelineReader basenameTimelineReader = new TimelineReader(basenameTimelineFileName);
		unitDB.load(null, null, unitFileReader, null, audioTimelineReader, basenameTimelineReader, 0);
	}

	/**
	 * Load unit feature file from file
	 * 
	 * @param fileName
	 *            to load
	 * @throws IOException
	 *             IOException
	 * @throws MaryConfigurationException
	 *             MaryConfigurationException
	 */
	protected void loadFeatureFile(String fileName) throws IOException, MaryConfigurationException {
		featureFileReader = new FeatureFileReader(fileName);
		featureDefinition = featureFileReader.getFeatureDefinition();
		phoneFeatureIndex = featureDefinition.getFeatureIndex("phone");
		halfphoneLRFeatureIndex = featureDefinition.getFeatureIndex("halfphone_lr");
	}

	/**
	 * Get total duration of a Datagram array
	 * 
	 * @param datagrams
	 *            whose duration to get
	 * @return total duration in seconds
	 */
	protected double getDuration(Datagram[] datagrams) {
		double totalDuration = 0;
		for (Datagram datagram : datagrams) {
			totalDuration += datagram.getDuration() / (float) unitDB.getAudioTimeline().getSampleRate();
		}
		return totalDuration;
	}

	/**
	 * Get raw samples from all Datagrams in an array
	 * 
	 * @param datagrams
	 *            whose samples to get
	 * @return raw samples as stored in the Datagrams
	 * @throws IOException
	 *             IOException
	 */
	protected byte[] getSamples(Datagram[] datagrams) throws IOException {
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		for (Datagram datagram : datagrams) {
			byte[] data = datagram.getData();
			baos.write(data);
		}
		byte[] samples = baos.toByteArray();
		return samples;
	}

	/**
	 * Dump units to Praat TextGrid. This will have three tiers:
	 * <ol>
	 * <li>halfphone units, labeled with unit indices;</li>
	 * <li>phone units, labeled with allophones;</li>
	 * <li>basenames, labeled with basename of original utterance.</li>
	 * </ol>
	 * 
	 * @param fileName
	 *            of new TextGrid
	 * @throws IOException
	 *             if data files cannot be read, or TextGrid cannot be written
	 */
	protected void dumpTextGrid(String fileName) throws IOException {
		// init the tiers:
		PraatIntervalTier unitTier = new PraatIntervalTier("unitindex");
		PraatIntervalTier phoneTier = new PraatIntervalTier("halfphone");
		PraatIntervalTier basenameTier = new PraatIntervalTier("basename");

		// init some variables:
		double prevHalfPhoneUnitDurationInSeconds = 0;
		double basenameDurationInSeconds = 0;
		String basenameLabel = null;

		// iterate over all units:
		for (int unitIndex = 0; unitIndex < unitDB.getUnitFileReader().getNumberOfUnits(); unitIndex++) {
			// if (unitIndex > 727) {
			// break;
			// }
			Unit unit = unitDB.getUnitFileReader().getUnit(unitIndex);
			if (unit.isEdgeUnit()) {
				// if this is the left edge, basenameDurationInSeconds will be 0
				if (basenameDurationInSeconds > 0) {
					// add basename interval
					PraatInterval basenameInterval = new PraatInterval(basenameDurationInSeconds, basenameLabel);
					basenameTier.appendInterval(basenameInterval);
					basenameDurationInSeconds = 0;
				}
				continue; // ignore edge units (also, avoid ticket:335)
			}

			// iterate over datagrams to get exact duration:
			Datagram[] datagrams;
			try {
				datagrams = unitDB.getAudioTimeline().getDatagrams(unit, unitDB.getAudioTimeline().getSampleRate());
			} catch (BufferUnderflowException e) {
				throw e;
			}
			double halfPhoneUnitDurationInSeconds = getDuration(datagrams);
			// cumulative sample count for wav file header:
			byte[] buf = getSamples(datagrams);
			numSamples += buf.length;

			// keep track of basename duration and label:
			basenameDurationInSeconds += halfPhoneUnitDurationInSeconds;
			basenameLabel = unitDB.getFilename(unit);

			// halfphone unit interval (labeled with unit index):
			PraatInterval interval = new PraatInterval(halfPhoneUnitDurationInSeconds, Integer.toString(unit.index));
			unitTier.appendInterval(interval);

			// lazy way of checking that we have both halves of the phone:
			FeatureVector features = featureFileReader.getFeatureVector(unit);
			String halfphoneLR = features.getFeatureAsString(halfphoneLRFeatureIndex, featureDefinition);
			if (halfphoneLR.equals("R")) {
				// phone interval:
				double phoneUnitDurationInSeconds = halfPhoneUnitDurationInSeconds + prevHalfPhoneUnitDurationInSeconds;
				String phoneLabel = features.getFeatureAsString(phoneFeatureIndex, featureDefinition);
				PraatInterval phoneInterval = new PraatInterval(phoneUnitDurationInSeconds, phoneLabel);
				phoneTier.appendInterval(phoneInterval);
			}
			prevHalfPhoneUnitDurationInSeconds = halfPhoneUnitDurationInSeconds;
		}

		// update time domains:
		unitTier.updateBoundaries();
		phoneTier.updateBoundaries();
		basenameTier.updateBoundaries();

		// create TextGrid:
		PraatTextGrid textGrid = new PraatTextGrid();
		textGrid.appendTier(unitTier);
		textGrid.appendTier(phoneTier);
		textGrid.appendTier(basenameTier);

		// write to text file:
		BufferedWriter output = new BufferedWriter(new PrintWriter(fileName));
		output.write(textGrid.toString());
		output.close();
	}

	/**
	 * Adapted from {@link marytts.util.data.audio.WavWriter#export(String, int, byte[])} and
	 * {@link marytts.util.data.audio.WavWriter#doWrite(String, int)}
	 * 
	 * @param fileName
	 *            fileName
	 * @throws IOException
	 *             IOException
	 */
	protected void dumpAudio(String fileName) throws IOException {
		// refuse to write wav file if we don't know how many samples there are:
		if (!(numSamples > 0)) {
			return;
		}

		// open wav file, and write header:
		DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(fileName)));
		int nBytesPerSample = 2;
		dos.writeBytes("RIFF"); // "RIFF" in ascii
		dos.writeInt(byteswap((int) (36 + numSamples))); // Chunk size
		dos.writeBytes("WAVEfmt ");
		dos.writeInt(byteswap(16)); // chunk size, 16 for PCM
		dos.writeShort(byteswap((short) 1)); // PCM format
		dos.writeShort(byteswap((short) 1)); // Mono, one channel
		dos.writeInt(byteswap(unitDB.getAudioTimeline().getSampleRate())); // Samplerate
		dos.writeInt(byteswap(unitDB.getAudioTimeline().getSampleRate() * nBytesPerSample)); // Byte-rate
		dos.writeShort(byteswap((short) (nBytesPerSample))); // Nbr of bytes per samples x nbr of channels
		dos.writeShort(byteswap((short) (nBytesPerSample * 8))); // nbr of bits per sample
		dos.writeBytes("data");
		dos.writeInt(byteswap((int) numSamples));

		// implicitly unit-wise buffered writing of samples:
		for (int unitIndex = 0; unitIndex < unitDB.getUnitFileReader().getNumberOfUnits(); unitIndex++) {
			Unit unit = unitDB.getUnitFileReader().getUnit(unitIndex);
			if (unit.isEdgeUnit()) {
				continue; // ignore edge units (also, avoid ticket:335)
			}

			Datagram[] datagrams = unitDB.getAudioTimeline().getDatagrams(unit, unitDB.getAudioTimeline().getSampleRate());
			byte[] buf = getSamples(datagrams);

			// write buffer to file:
			// Byte-swap the samples
			byte b = 0;
			for (int j = 0; j < buf.length - 1; j += 2) {
				b = buf[j];
				try {
					buf[j] = buf[j + 1];
				} catch (ArrayIndexOutOfBoundsException e) {
					throw e;
				}
				buf[j + 1] = b;
			}
			dos.write(buf);
		}

		dos.close();
	}

	/**
	 * Get file names from voice config file. Dump relevant data from audio timeline, unit file, etc. to Praat TextGrid and wav
	 * file.
	 * 
	 * @param voiceName
	 *            for config file to read (e.g. "bits3")
	 * @throws Exception
	 *             Exception
	 */
	protected void dumpData(String voiceName) throws Exception {

		String audioTimelineFileName = MaryProperties.needFilename("voice." + voiceName + ".audioTimelineFile");
		String basenameTimelineFileName = MaryProperties.needFilename("voice." + voiceName + ".basenameTimeline");
		String unitFileName = MaryProperties.needFilename("voice." + voiceName + ".unitsFile");
		String featureFileName = MaryProperties.needFilename("voice." + voiceName + ".featureFile");
		String textGridFilename = audioTimelineFileName.replace(".mry", ".TextGrid");
		String wavFilename = audioTimelineFileName.replace(".mry", ".wav");

		loadUnitDatabase(audioTimelineFileName, basenameTimelineFileName, unitFileName);
		loadFeatureFile(featureFileName);
		System.out.println("All files loaded.");
		dumpTextGrid(textGridFilename);
		System.out.println("Dumped TextGrid to " + textGridFilename);
		dumpAudio(wavFilename);
		System.out.println("Dumped audio to " + wavFilename);
	}

	/**
	 * Main method. Add VOICE jar to classpath, then call with
	 * 
	 * <pre>
	 * -ea -Xmx1gb -Dmary.base=$MARYBASE VOICE
	 * </pre>
	 * 
	 * or something similar
	 * 
	 * @param args
	 *            voice name (without the Locale) of voice to dump data from
	 * @throws Exception
	 *             Exception
	 */
	public static void main(String[] args) throws Exception {
		new VoiceDataDumper().dumpData(args[0]);
	}

}