/** * Copyright 2002 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.language.de.preprocess; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import marytts.util.MaryUtils; import marytts.util.dom.MaryDomUtils; import org.w3c.dom.Document; import org.w3c.dom.Element; /** * An expansion pattern implementation for measure patterns. * * @author Marc Schröder */ public class MeasureEP extends ExpansionPattern { private final String[] _knownTypes = { "measure" }; /** * Every subclass has its own list knownTypes, an internal string representation of known types. These are possible values of * the <code>type</code> attribute to the <code>say-as</code> element, as defined in MaryXML.dtd. If there is more than one * known type, the first type (<code>knownTypes[0]</code>) is expected to be the most general one, of which the others are * specializations. */ private final List<String> knownTypes = Arrays.asList(_knownTypes); public List<String> knownTypes() { return knownTypes; } // number-dependent-feminine measure-symbol-names private final String[] _nuDeFeMeasureSymbolNames = { "s", "Sekunde", "sec", "Sekunde", "ms", "Millisekunde", "msec", "Millisekunde", "min", "Minute", "kcal", "Kilokalorie", "oz.", "Unze", "oz", "Unze", }; // (number-independent) masculine (or neuter) measure-symbol-names private final String[] _maMeasureSymbolNames = { "km", "Kilometer", "dm", "Dezimeter", "cm", "Zentimeter", "mm", "Millimeter", "g", "Gramm", "kg", "Kilogramm", "mg", "Milligramm", "A", "Ampere[am-'pe:6]", "V", "Volt", "K", "Kelvin['kEl-vi:n]", new Character((char) 176).toString() + "C", "Grad Celsius['tsEl-zi:-Us]", new Character((char) 730).toString() + "C", "Grad Celsius['tsEl-zi:-Us]", "\u2103", "Grad Celsius['tsEl-zi:-Us]", // ℃ new Character((char) 176).toString() + "F", "Grad Fahrenheit", new Character((char) 730).toString() + "F", "Grad Fahrenheit", "\u2109", "Grad Fahrenheit", // ℉ "Hz", "Hertz", "kHz", "Kilohertz", "MHz", "Megahertz", "GHz", "GigaHertz", "N", "Newton['nju:-t@n]", "Pa", "Pascal", "J", "Joule['dZu:l]", "kJ", "Kilojoule['ki:-lo:-dZu:l]", "W", "Watt", "kW", "Kilowatt", "MW", "Megawatt", "GW", "Gigawatt", "mW", "Milliwatt", "l", "Liter", "dl", "Deziliter", "cl", "Zentiliter", "ml", "Milliliter", "Bq", "Becquerel[bE-k@-'rEl]", "EL", "Esslöffel", "TL", "Teelöffel", "qm", "Quadratmeter", "m" + new Character((char) 178).toString(), "Quadratmeter", "m" + new Character((char) 179).toString(), "Kubikmeter", "ccm", "Kubikzentimeter", "m", "Meter", "%", "Prozent", }; private final Map<String, String> nuDeFeMeasureSymbolNames = MaryUtils.arrayToMap(_nuDeFeMeasureSymbolNames); private final Map<String, String> maMeasureSymbolNames = MaryUtils.arrayToMap(_maMeasureSymbolNames); // Domain-specific primitives: protected final String sMeasureSymbol = getMeasureSymbols(); // We don't use sMatchingChars here, but override isCandidate(). // Now the actual match patterns: protected final Pattern reMeasureSymbol = Pattern.compile("(" + sMeasureSymbol + ")"); protected final Pattern reMeasure = Pattern.compile("(" + NumberEP.sInteger + "|" + NumberEP.sFloat + ")" + "(" + sMeasureSymbol + ")"); private final Pattern reMatchingChars = null; public Pattern reMatchingChars() { return reMatchingChars; } /** * Every subclass has its own logger. The important point is that if several threads are accessing the variable at the same * time, the logger needs to be thread-safe or it will produce rubbish. * * @return _sMeasureSymbol.toString() */ // private Logger logger = MaryUtils.getLogger("MeasureEP"); // Only used to initialize sMeasureSymbol from _measureSymbolNames[]: private String getMeasureSymbols() { StringBuilder _sMeasureSymbol = new StringBuilder("(?:"); if (_nuDeFeMeasureSymbolNames.length > 0) _sMeasureSymbol.append(_nuDeFeMeasureSymbolNames[0]); for (int i = 2; i < _nuDeFeMeasureSymbolNames.length; i += 2) { _sMeasureSymbol.append("|" + _nuDeFeMeasureSymbolNames[i]); } if (_nuDeFeMeasureSymbolNames.length > 0 && _maMeasureSymbolNames.length > 0) _sMeasureSymbol.append("|"); if (_maMeasureSymbolNames.length > 0) _sMeasureSymbol.append(_maMeasureSymbolNames[0]); for (int i = 2; i < _maMeasureSymbolNames.length; i += 2) { _sMeasureSymbol.append("|" + _maMeasureSymbolNames[i]); } _sMeasureSymbol.append(")"); return _sMeasureSymbol.toString(); } public MeasureEP() { super(); } protected boolean isCandidate(Element t) { String s = MaryDomUtils.tokenText(t); return (reMeasureSymbol.matcher(s).matches() || number.isCandidate(t) || reMeasure.matcher(s).matches()); } protected int canDealWith(String s, int type) { return match(s, type); } protected int match(String s, int type) { switch (type) { case 0: if (reMeasure.matcher(s).matches()) return 0; break; } return -1; } protected List<Element> expand(List<Element> tokens, String s, int type) { if (tokens == null) throw new NullPointerException("Received null argument"); if (tokens.isEmpty()) throw new IllegalArgumentException("Received empty list"); Document doc = ((Element) tokens.get(0)).getOwnerDocument(); // we expect type to be one of the return values of match(): List<Element> expanded = null; switch (type) { case 0: expanded = expandMeasure(doc, s); break; } replaceTokens(tokens, expanded); return expanded; } protected List<Element> expandMeasure(Document doc, String s) { ArrayList<Element> exp = new ArrayList<Element>(); StringBuilder sb = new StringBuilder(); String measure = null; String amount = null; Matcher reMatcher = reMeasure.matcher(s); if (!reMatcher.find()) return null; amount = reMatcher.group(1); measure = reMatcher.group(2); // String measureName = (String)measureSymbolNames.get(measure); boolean measureIsMasculine = false; String measureName = (String) nuDeFeMeasureSymbolNames.get(measure); if (measureName == null) { measureName = (String) maMeasureSymbolNames.get(measure); measureIsMasculine = true; if (amount.equals("1")) sb.append("ein"); else sb.append(number.expandFloat(amount)); } else { if (amount.equals("1")) sb.append("eine"); else sb.append(number.expandFloat(amount)); } sb.append(" "); sb.append(measureName); if (!measureIsMasculine && !amount.equals("1")) sb.append("n"); exp.addAll(makeNewTokens(doc, sb.toString(), true, s)); return exp; } }