/** * Copyright 2009 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.tools.analysis; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; import marytts.modules.phonemiser.Allophone; import marytts.modules.phonemiser.AllophoneSet; import marytts.signalproc.analysis.AlignedLabels; import marytts.signalproc.analysis.Labels; import marytts.util.data.text.XwavesLabelfileReader; import marytts.util.string.StringUtils; /** * This class aligns a label file with an XML file in MARY ALLOPHONES format, modifying the structure of the XML file as needed to * match the label file. After calling alignXMLTranscriptions(), it is guaranteed that an iteration through all PHONE and BOUNDARY * nodes of the XML file matches the label file. * * @author marc * */ public class TranscriptionAligner { protected Map<String, Integer> aligncost; protected int defaultcost; protected int defaultBoundaryCost; protected int skipcost; protected AllophoneSet allophoneSet; // String for a boundary protected String possibleBnd; protected String entrySeparator; protected boolean ensureInitialBoundary = false; public TranscriptionAligner() { this(null); } public TranscriptionAligner(AllophoneSet allophoneSet) { this(allophoneSet, null); } public TranscriptionAligner(AllophoneSet allophoneSet, String entrySeparator) { this.aligncost = new HashMap<String, Integer>(); this.defaultcost = 10; // phone set is used for splitting the sampa strings and setting the costs this.allophoneSet = allophoneSet; if (allophoneSet != null) { possibleBnd = allophoneSet.getSilence().name(); } else { possibleBnd = "_"; } if (entrySeparator != null) { this.entrySeparator = entrySeparator; } else { this.entrySeparator = "|"; } this.setDistance(); defaultcost = this.getMaxCost(); // align boundaries only to itself defaultBoundaryCost = 20 * defaultcost; // distance between pauses is zero, with slight conservative bias aligncost.put(possibleBnd + " " + possibleBnd, 0); skipcost = defaultcost * 1 / 10; // 0.25 / 0.3 /0.33 seem all fine } public void SetEnsureInitialBoundary(boolean value) { this.ensureInitialBoundary = value; } public boolean getEnsureInitialBoundary() { return ensureInitialBoundary; } public String getEntrySeparator() { return entrySeparator; } /** * This reads in a label file and returns a String of the phonetic symbols, separated by the entry separator character * entrySeparator. * * @param entrySeparator * entry separator * @param ensureInitialBoundary * ensure initial boundary * @param trfname * trf name * @throws IOException * if something goes wrong with opening/reading the file * @return result * */ public static String readLabelFile(String entrySeparator, boolean ensureInitialBoundary, String trfname) throws IOException { // reader for label file. BufferedReader lab = new BufferedReader(new FileReader(trfname)); try { // get XwavesLabelfileDataSouce to parse Xwaves label file and store times and labels: XwavesLabelfileReader xlds = new XwavesLabelfileReader(trfname); // join them to a string, with entrySeparator as glue: String result = StringUtils.join(entrySeparator, xlds.getLabelSymbols()); // if Label File does not start with pause symbol, insert it // as well as a pause duration of zero (...) if (ensureInitialBoundary && result.charAt(0) != '_') { result = "_" + entrySeparator + result; } return result; } finally { lab.close(); } } /** * This sets the distance by using the phone set of the aligner object. Phone set must already be specified. */ private void setDistance() { if (null == this.allophoneSet) { System.err.println("No allophone set -- cannot use intelligent distance metrics"); return; } for (String fromSym : this.allophoneSet.getAllophoneNames()) { for (String toSym : this.allophoneSet.getAllophoneNames()) { int diff = 0; Allophone fromPh = this.allophoneSet.getAllophone(fromSym); Allophone toPh = this.allophoneSet.getAllophone(toSym); // for each difference increase distance diff += (!fromSym.equals(toSym)) ? 2 : 0; diff += (fromPh.isFricative() != toPh.isFricative()) ? 2 : 0; diff += (fromPh.isGlide() != toPh.isGlide()) ? 2 : 0; diff += (fromPh.isLiquid() != toPh.isLiquid()) ? 2 : 0; diff += (fromPh.isNasal() != toPh.isNasal()) ? 2 : 0; diff += (fromPh.isPlosive() != toPh.isPlosive()) ? 1 : 0; diff += (fromPh.isSonorant() != toPh.isSonorant()) ? 2 : 0; diff += (fromPh.isSyllabic() != toPh.isSyllabic()) ? 1 : 0; diff += (fromPh.isVoiced() != toPh.isVoiced()) ? 1 : 0; diff += (fromPh.isVowel() != toPh.isVowel()) ? 2 : 0; diff += Math.abs(fromPh.sonority() - toPh.sonority()); String key = fromSym + " " + toSym; this.aligncost.put(key, diff); } } } /** * * This computes the alignment that has the lowest distance between two Strings. * * There are three differences to the normal Levenshtein-distance: * * 1. Only insertions and deletions are allowed, no replacements (i.e. no "diagonal" transitions) 2. insertion costs are * dependent on a particular phone on the input side (the one they are aligned to) 3. deletion is equivalent to a symbol on * the input side that is not aligned. There are costs associated with that. * * The method returns the output string with alignment boundaries ('#') inserted. * * @param in * in * @param out * out * @return p_al[ostr.length] */ protected String distanceAlign(String in, String out) { String[] istr = in.split(Pattern.quote(entrySeparator)); String[] ostr = out.split(Pattern.quote(entrySeparator)); String delim = "#"; // distances: // 1. previous distance (= previous column in matrix) int[] p_d = new int[ostr.length + 1]; // 2. current distance int[] d = new int[ostr.length + 1]; // 3. dummy array for swapping, when switching to new column int[] _d; // array indicating if a skip was performed (= if current character has not been aligned) // same arrays as for distances boolean[] p_sk = new boolean[ostr.length + 1]; boolean[] sk = new boolean[ostr.length + 1]; boolean[] _sk; // arrays storing the alignments corresponding to distances String[] p_al = new String[ostr.length + 1]; String[] al = new String[ostr.length + 1]; String[] _al; // initialize values p_d[0] = 0; p_al[0] = ""; p_sk[0] = true; // ... still initializing for (int j = 1; j < ostr.length + 1; j++) { // only possibility first is to align the first letter // of the input string to everything p_al[j] = p_al[j - 1] + " " + ostr[j - 1]; p_d[j] = p_d[j - 1] + symDist(istr[0], ostr[j - 1]); p_sk[j] = false; } // constant penalty for not aligning a character int skConst = this.skipcost; // align // can start at 1, since 0 has been treated in initialization for (int i = 1; i < istr.length; i++) { // zero'st row stands for skipping from the beginning on d[0] = p_d[0] + skConst; al[0] = p_al[0] + " " + delim; sk[0] = true; for (int j = 1; j < ostr.length + 1; j++) { // translation cost between symbols ( j-1, because 0 row // inserted for not aligning at beginning) int tr_cost = symDist(istr[i], ostr[j - 1]); // skipping cost greater zero if not yet aligned int sk_cost = p_sk[j] ? skConst : 0; if (sk_cost + p_d[j] < tr_cost + d[j - 1]) { // skipping cheaper // cost is cost from previous input char + skipping d[j] = sk_cost + p_d[j]; // alignment is from prev. input + delimiter al[j] = p_al[j] + " " + delim; // yes, we skipped sk[j] = true; } else { // aligning cheaper // cost is that from previously aligned output + distance d[j] = tr_cost + d[j - 1]; // alignment continues from previously aligned al[j] = al[j - 1] + " " + ostr[j - 1]; // nope, didn't skip sk[j] = false; } } // swapping _d = p_d; p_d = d; d = _d; _sk = p_sk; p_sk = sk; sk = _sk; _al = p_al; p_al = al; al = _al; } return p_al[ostr.length]; } /** * Align the two given sequences of labels and return a mapping array indicating which index in first should be aligned to * which index in second. * * @param first * first * @param second * second * @return an array m of integers -- for each index i in first, m[i] gives the (rightmost) corresponding index in second. */ public AlignedLabels alignLabels(Labels first, Labels second) { String firstLabels = StringUtils.join(entrySeparator, first.getLabelSymbols()); String secondLabels = StringUtils.join(entrySeparator, second.getLabelSymbols()); String aligned = distanceAlign(firstLabels, secondLabels); // Now, in aligned, the hash signs separate fields corresponding to first; // the field contains the label symbols of second (space-separated) // that match this index in first. if (aligned.endsWith("#")) { aligned = aligned + " "; // make sure that the split operation does not discard a final empty field } String[] fields = aligned.split("#"); assert fields.length == first.items.length; int iSecond = -1; // start before first item int[] map = new int[fields.length]; for (int i = 0; i < fields.length; i++) { int numLabels; String f = fields[i].trim(); if (f.equals("")) { numLabels = 0; } else { numLabels = f.split(" ").length; } iSecond += numLabels; map[i] = Math.max(iSecond, 0); // if first elements in second are skipped, still map to 0, not to -1. } return new AlignedLabels(first, second, map); } private int getMaxCost() { if (aligncost.isEmpty()) return defaultcost; int maxMapping = Collections.max(aligncost.values()); return (maxMapping > defaultcost) ? maxMapping : defaultcost; } private int symDist(String aString1, String aString2) { String key = aString1 + " " + aString2; // if a value is stored, return it if (this.aligncost.containsKey(key)) { return aligncost.get(key); } else if (aString1.equals(aString2)) { return 0; } else if (aString1.equals(possibleBnd) || aString2.equals(possibleBnd)) { // one but not the other is a possible boundary: return defaultBoundaryCost; } return defaultcost; } }