/** * Copyright 2010 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.unitselection.analysis; import java.io.BufferedOutputStream; import java.io.BufferedWriter; import java.io.ByteArrayOutputStream; import java.io.DataOutputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import java.nio.BufferUnderflowException; import marytts.exceptions.MaryConfigurationException; import marytts.features.FeatureDefinition; import marytts.features.FeatureVector; import marytts.server.MaryProperties; import marytts.unitselection.data.FeatureFileReader; import marytts.unitselection.data.TimelineReader; import marytts.unitselection.data.Unit; import marytts.unitselection.data.UnitDatabase; import marytts.unitselection.data.UnitFileReader; import marytts.util.data.Datagram; import marytts.util.data.text.PraatInterval; import marytts.util.data.text.PraatIntervalTier; import marytts.util.data.text.PraatTextGrid; /** * Convenience class to dump relevant data from a unit selection voice to a Praat TextGrid and a wav file for inspection of * timeline data in external tools (e.g. Praat, WaveSurfer, etc.) * * @author steiner * */ public class VoiceDataDumper { protected UnitDatabase unitDB; protected FeatureFileReader featureFileReader; protected long numSamples = 0; protected FeatureDefinition featureDefinition; protected int phoneFeatureIndex; protected int halfphoneLRFeatureIndex; public VoiceDataDumper() { } /** * @see marytts.util.data.audio.WavWriter#byteswap(int) * @param val * val * @return (((val & 0xff000000) >>> 24) + ((val & 0x00ff0000) >>> 8) + ((val & 0x0000ff00) * << 8) + ((val & 0x000000ff) << 24)) */ protected int byteswap(int val) { return (((val & 0xff000000) >>> 24) + ((val & 0x00ff0000) >>> 8) + ((val & 0x0000ff00) << 8) + ((val & 0x000000ff) << 24)); } /** * @see marytts.util.data.audio.WavWriter#byteswap(short) * @param val * val * @return ((short) ((((int) (val) & 0xff00) >>> 8) + (((int) (val) & 0x00ff) << 8))) */ protected short byteswap(short val) { return ((short) ((((int) (val) & 0xff00) >>> 8) + (((int) (val) & 0x00ff) << 8))); } /** * Load audio timeline from file * * @param fileName * to load * @return TimelineReader * @throws IOException * IOException * @throws MaryConfigurationException * MaryConfigurationException */ protected TimelineReader loadAudioTimeline(String fileName) throws IOException, MaryConfigurationException { return new TimelineReader(fileName); } /** * Load unit database from various relevant files * * @param audioTimelineFileName * to load * @param basenameTimelineFileName * to load * @param unitFileName * to load * @throws IOException * IOException * @throws MaryConfigurationException * MaryConfigurationException */ protected void loadUnitDatabase(String audioTimelineFileName, String basenameTimelineFileName, String unitFileName) throws IOException, MaryConfigurationException { unitDB = new UnitDatabase(); UnitFileReader unitFileReader = new UnitFileReader(unitFileName); TimelineReader audioTimelineReader = loadAudioTimeline(audioTimelineFileName); TimelineReader basenameTimelineReader = new TimelineReader(basenameTimelineFileName); unitDB.load(null, null, unitFileReader, null, audioTimelineReader, basenameTimelineReader, 0); } /** * Load unit feature file from file * * @param fileName * to load * @throws IOException * IOException * @throws MaryConfigurationException * MaryConfigurationException */ protected void loadFeatureFile(String fileName) throws IOException, MaryConfigurationException { featureFileReader = new FeatureFileReader(fileName); featureDefinition = featureFileReader.getFeatureDefinition(); phoneFeatureIndex = featureDefinition.getFeatureIndex("phone"); halfphoneLRFeatureIndex = featureDefinition.getFeatureIndex("halfphone_lr"); } /** * Get total duration of a Datagram array * * @param datagrams * whose duration to get * @return total duration in seconds */ protected double getDuration(Datagram[] datagrams) { double totalDuration = 0; for (Datagram datagram : datagrams) { totalDuration += datagram.getDuration() / (float) unitDB.getAudioTimeline().getSampleRate(); } return totalDuration; } /** * Get raw samples from all Datagrams in an array * * @param datagrams * whose samples to get * @return raw samples as stored in the Datagrams * @throws IOException * IOException */ protected byte[] getSamples(Datagram[] datagrams) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); for (Datagram datagram : datagrams) { byte[] data = datagram.getData(); baos.write(data); } byte[] samples = baos.toByteArray(); return samples; } /** * Dump units to Praat TextGrid. This will have three tiers: * <ol> * <li>halfphone units, labeled with unit indices;</li> * <li>phone units, labeled with allophones;</li> * <li>basenames, labeled with basename of original utterance.</li> * </ol> * * @param fileName * of new TextGrid * @throws IOException * if data files cannot be read, or TextGrid cannot be written */ protected void dumpTextGrid(String fileName) throws IOException { // init the tiers: PraatIntervalTier unitTier = new PraatIntervalTier("unitindex"); PraatIntervalTier phoneTier = new PraatIntervalTier("halfphone"); PraatIntervalTier basenameTier = new PraatIntervalTier("basename"); // init some variables: double prevHalfPhoneUnitDurationInSeconds = 0; double basenameDurationInSeconds = 0; String basenameLabel = null; // iterate over all units: for (int unitIndex = 0; unitIndex < unitDB.getUnitFileReader().getNumberOfUnits(); unitIndex++) { // if (unitIndex > 727) { // break; // } Unit unit = unitDB.getUnitFileReader().getUnit(unitIndex); if (unit.isEdgeUnit()) { // if this is the left edge, basenameDurationInSeconds will be 0 if (basenameDurationInSeconds > 0) { // add basename interval PraatInterval basenameInterval = new PraatInterval(basenameDurationInSeconds, basenameLabel); basenameTier.appendInterval(basenameInterval); basenameDurationInSeconds = 0; } continue; // ignore edge units (also, avoid ticket:335) } // iterate over datagrams to get exact duration: Datagram[] datagrams; try { datagrams = unitDB.getAudioTimeline().getDatagrams(unit, unitDB.getAudioTimeline().getSampleRate()); } catch (BufferUnderflowException e) { throw e; } double halfPhoneUnitDurationInSeconds = getDuration(datagrams); // cumulative sample count for wav file header: byte[] buf = getSamples(datagrams); numSamples += buf.length; // keep track of basename duration and label: basenameDurationInSeconds += halfPhoneUnitDurationInSeconds; basenameLabel = unitDB.getFilename(unit); // halfphone unit interval (labeled with unit index): PraatInterval interval = new PraatInterval(halfPhoneUnitDurationInSeconds, Integer.toString(unit.index)); unitTier.appendInterval(interval); // lazy way of checking that we have both halves of the phone: FeatureVector features = featureFileReader.getFeatureVector(unit); String halfphoneLR = features.getFeatureAsString(halfphoneLRFeatureIndex, featureDefinition); if (halfphoneLR.equals("R")) { // phone interval: double phoneUnitDurationInSeconds = halfPhoneUnitDurationInSeconds + prevHalfPhoneUnitDurationInSeconds; String phoneLabel = features.getFeatureAsString(phoneFeatureIndex, featureDefinition); PraatInterval phoneInterval = new PraatInterval(phoneUnitDurationInSeconds, phoneLabel); phoneTier.appendInterval(phoneInterval); } prevHalfPhoneUnitDurationInSeconds = halfPhoneUnitDurationInSeconds; } // update time domains: unitTier.updateBoundaries(); phoneTier.updateBoundaries(); basenameTier.updateBoundaries(); // create TextGrid: PraatTextGrid textGrid = new PraatTextGrid(); textGrid.appendTier(unitTier); textGrid.appendTier(phoneTier); textGrid.appendTier(basenameTier); // write to text file: BufferedWriter output = new BufferedWriter(new PrintWriter(fileName)); output.write(textGrid.toString()); output.close(); } /** * Adapted from {@link marytts.util.data.audio.WavWriter#export(String, int, byte[])} and * {@link marytts.util.data.audio.WavWriter#doWrite(String, int)} * * @param fileName * fileName * @throws IOException * IOException */ protected void dumpAudio(String fileName) throws IOException { // refuse to write wav file if we don't know how many samples there are: if (!(numSamples > 0)) { return; } // open wav file, and write header: DataOutputStream dos = new DataOutputStream(new BufferedOutputStream(new FileOutputStream(fileName))); int nBytesPerSample = 2; dos.writeBytes("RIFF"); // "RIFF" in ascii dos.writeInt(byteswap((int) (36 + numSamples))); // Chunk size dos.writeBytes("WAVEfmt "); dos.writeInt(byteswap(16)); // chunk size, 16 for PCM dos.writeShort(byteswap((short) 1)); // PCM format dos.writeShort(byteswap((short) 1)); // Mono, one channel dos.writeInt(byteswap(unitDB.getAudioTimeline().getSampleRate())); // Samplerate dos.writeInt(byteswap(unitDB.getAudioTimeline().getSampleRate() * nBytesPerSample)); // Byte-rate dos.writeShort(byteswap((short) (nBytesPerSample))); // Nbr of bytes per samples x nbr of channels dos.writeShort(byteswap((short) (nBytesPerSample * 8))); // nbr of bits per sample dos.writeBytes("data"); dos.writeInt(byteswap((int) numSamples)); // implicitly unit-wise buffered writing of samples: for (int unitIndex = 0; unitIndex < unitDB.getUnitFileReader().getNumberOfUnits(); unitIndex++) { Unit unit = unitDB.getUnitFileReader().getUnit(unitIndex); if (unit.isEdgeUnit()) { continue; // ignore edge units (also, avoid ticket:335) } Datagram[] datagrams = unitDB.getAudioTimeline().getDatagrams(unit, unitDB.getAudioTimeline().getSampleRate()); byte[] buf = getSamples(datagrams); // write buffer to file: // Byte-swap the samples byte b = 0; for (int j = 0; j < buf.length - 1; j += 2) { b = buf[j]; try { buf[j] = buf[j + 1]; } catch (ArrayIndexOutOfBoundsException e) { throw e; } buf[j + 1] = b; } dos.write(buf); } dos.close(); } /** * Get file names from voice config file. Dump relevant data from audio timeline, unit file, etc. to Praat TextGrid and wav * file. * * @param voiceName * for config file to read (e.g. "bits3") * @throws Exception * Exception */ protected void dumpData(String voiceName) throws Exception { String audioTimelineFileName = MaryProperties.needFilename("voice." + voiceName + ".audioTimelineFile"); String basenameTimelineFileName = MaryProperties.needFilename("voice." + voiceName + ".basenameTimeline"); String unitFileName = MaryProperties.needFilename("voice." + voiceName + ".unitsFile"); String featureFileName = MaryProperties.needFilename("voice." + voiceName + ".featureFile"); String textGridFilename = audioTimelineFileName.replace(".mry", ".TextGrid"); String wavFilename = audioTimelineFileName.replace(".mry", ".wav"); loadUnitDatabase(audioTimelineFileName, basenameTimelineFileName, unitFileName); loadFeatureFile(featureFileName); System.out.println("All files loaded."); dumpTextGrid(textGridFilename); System.out.println("Dumped TextGrid to " + textGridFilename); dumpAudio(wavFilename); System.out.println("Dumped audio to " + wavFilename); } /** * Main method. Add VOICE jar to classpath, then call with * * <pre> * -ea -Xmx1gb -Dmary.base=$MARYBASE VOICE * </pre> * * or something similar * * @param args * voice name (without the Locale) of voice to dump data from * @throws Exception * Exception */ public static void main(String[] args) throws Exception { new VoiceDataDumper().dumpData(args[0]); } }