/** * Copyright 2002 DFKI GmbH. * All Rights Reserved. Use is subject to license terms. * * This file is part of MARY TTS. * * MARY TTS is free software: you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * */ package marytts.language.de.preprocess; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.w3c.dom.Document; import org.w3c.dom.Element; /** * An expansion pattern implementation for time patterns. * * @author Marc Schröder */ public class TimeEP extends ExpansionPattern { private final String[] _knownTypes = { "time", "time:hms", "time:hm", "time:h", "time:hms12", "time:hms24" }; /** * Every subclass has its own list knownTypes, an internal string representation of known types. These are possible values of * the <code>type</code> attribute to the <code>say-as</code> element, as defined in MaryXML.dtd. If there is more than one * known type, the first type (<code>knownTypes[0]</code>) is expected to be the most general one, of which the others are * specializations. */ private final List<String> knownTypes = Arrays.asList(_knownTypes); public List<String> knownTypes() { return knownTypes; } // Domain-specific primitives: protected final String sHour = "(?:0?[0-9]|1[0-9]|2[0-4])"; protected final String sHour12 = "(?:0?[0-9]|1[0-2])"; protected final String sMinute = "(?:[0-5][0-9])"; protected final String sSecond = sMinute; protected final String sSep = "(?:\\:|\\.)"; protected final String sFinal = "(?:h|Uhr)"; protected final String sMatchingChars = "[0-9:\\.Uhr]"; protected final String timeOfDay = "a|A|am|AM|Am|aM|p|P|pm|PM|Pm|pM"; // Now the actual match patterns: protected final Pattern reHour = Pattern.compile("(" + sHour + ")" + sFinal); protected final Pattern reHourMinute = Pattern.compile("(" + sHour + ")" + sSep + "(" + sMinute + ")" + sFinal); protected final Pattern reHourMinuteSecond = Pattern.compile("(" + sHour + ")" + sSep + "(" + sMinute + ")" + sSep + "(" + sSecond + ")" + sFinal); private final Pattern reMatchingChars = Pattern.compile(sMatchingChars); public Pattern reMatchingChars() { return reMatchingChars; } /** * Every subclass has its own logger. The important point is that if several threads are accessing the variable at the same * time, the logger needs to be thread-safe or it will produce rubbish. */ // private Logger logger = MaryUtils.getLogger("TimeEP"); public TimeEP() { super(); } // reluctant match: for TEXT_DE input protected int match(String s, int type) { switch (type) { case 0: if (matchTimeHMS(s)) return 1; if (matchTimeHM(s)) return 2; // if (matchTimeH(s)) return 3; // shall not match in order to let DurationEP match: // -> "one hour" instead of "one o'clock" break; // cases 1 to 5 are say-as - cases and therefore left to // canDealWith(); /* * case 1: if (matchTimeHMS(s)) return 1; break; case 2: if (matchTimeHM(s)) return 2; break; case 3: if (matchTimeH(s)) * return 3; break; case 4: if (matchTimeHMS(s)) return 1; break; case 5: if (matchTimeHMS(s)) return 1; break; */ } return -1; } // greedy match: for SSML input protected int canDealWith(String s, int type) { switch (type) { // SSML doesn't distinguish between many different kinds // say-as-time-elements. so either it matches TimeHMS12 or // TimeHMS24 or none. // In consideration of this Class being written for // German, this method returns automatically "hms24", if // "hms12" doesn't fit. (for English it should better return // "hmsUnknown" or "hms12".) /* * case 0: if (canDealWithTimeHMS12(s)) return 4; if (canDealWithTimeHMS24(s)) return 5; break; case 1: if * (canDealWithTimeHMS12(s)) return 4; if (canDealWithTimeHMS24(s)) return 5; break; */ case 4: if (canDealWithTimeHMS12(s)) return 4; break; case 5: if (canDealWithTimeHMS24(s)) return 5; break; } return -1; } protected List<Element> expand(List<Element> tokens, String s, int type) { if (tokens == null) throw new NullPointerException("Received null argument"); if (tokens.isEmpty()) throw new IllegalArgumentException("Received empty list"); Document doc = ((Element) tokens.get(0)).getOwnerDocument(); // we expect type to be one of the return values of match(): List<Element> expanded = null; switch (type) { case 1: expanded = expandTimeHMS(doc, s); break; case 2: expanded = expandTimeHM(doc, s); break; case 3: expanded = expandTimeH(doc, s); break; case 4: expanded = expandTimeHMS12(doc, s); break; case 5: expanded = expandTimeHMS24(doc, s); break; } replaceTokens(tokens, expanded); return expanded; } protected boolean matchTimeHMS(String s) { return reHourMinuteSecond.matcher(s).matches(); } protected boolean matchTimeHM(String s) { return reHourMinute.matcher(s).matches(); } protected boolean matchTimeH(String s) { return reHour.matcher(s).matches(); } protected List<Element> expandTimeHMS(Document doc, String s) { ArrayList<Element> exp = new ArrayList<Element>(); StringBuilder sb = new StringBuilder(); Matcher reMatcher = reHourMinuteSecond.matcher(s); if (!reMatcher.find()) { return null; } String hour = reMatcher.group(1); // first bracket pair in reHour: hour if (hour.equals("1") || hour.equals("01")) { sb.append("ein"); } else { sb.append(number.expandInteger(hour)); } String minute = reMatcher.group(2); sb.append(" Uhr"); if (!minute.equals("00")) { sb.append(" "); sb.append(number.expandInteger(minute)); } // Create one mtu from hour and minute: // !!!! (the original text for the mtu is actually not s) exp.addAll(makeNewTokens(doc, sb.toString(), true, s)); String second = reMatcher.group(3); if (!second.equals("00")) { exp.addAll(makeNewTokens(doc, "und")); if (second.equals("01")) { exp.addAll(makeNewTokens(doc, "eine Sekunde")); } else { exp.addAll(number.expandInteger(doc, second, false)); exp.addAll(makeNewTokens(doc, "Sekunden")); } } return exp; } protected List<Element> expandTimeHM(Document doc, String s) { ArrayList<Element> exp = new ArrayList<Element>(); StringBuilder sb = new StringBuilder(); Matcher reMatcher = reHourMinute.matcher(s); reMatcher.find(); String hour = reMatcher.group(1); if (hour.equals("1") || hour.equals("01")) { sb.append("ein"); } else { sb.append(number.expandInteger(hour)); } String minute = reMatcher.group(2); sb.append(" Uhr"); if (!minute.equals("00")) { sb.append(" "); sb.append(number.expandInteger(minute)); } // Create one mtu from hour and minute: exp.addAll(makeNewTokens(doc, sb.toString(), true, s)); return exp; } protected List<Element> expandTimeH(Document doc, String s) { ArrayList<Element> exp = new ArrayList<Element>(); Matcher reMatcher = reHour.matcher(s); reMatcher.find(); String hour = reMatcher.group(1); // first bracket pair in reHour: hour if (hour.equals("1") || hour.equals("01")) { exp.addAll(makeNewTokens(doc, "ein")); } else { exp.addAll(number.expandInteger(doc, hour, false)); } exp.addAll(makeNewTokens(doc, "Uhr")); return exp; } // this is extremly greedy in consideration of the content // really being expected to represent a time protected boolean canDealWithTimeHMS12(String s) { return containsOneOrMoreDigits(s); } protected boolean canDealWithTimeHMS24(String s) { return containsOneOrMoreDigits(s); } protected List<Element> expandTimeHMS12(Document doc, String s) { boolean isAfternoon = isAfternoon(s); s = extractDigits(s); if (s.length() == 0) return null; // alternatively it could return "Null Uhr" // (that means: "midnight"); // add an initial "0" if hours are represented in one digit if (s.length() % 2 == 1) s = "0" + s; if (isAfternoon) { // add 12 hours, because it's pm if (s.length() > 2) { String hours = add12Hours(s.substring(0, 2)); s = hours + s.substring(2, s.length()); } else { s = add12Hours(s); } } return expandTimeHMS12or24(doc, s); } protected List<Element> expandTimeHMS24(Document doc, String s) { s = extractDigits(s); if (s.length() == 0) return null; // alternatively it could return "Null Uhr" // (that means: "midnight"); // add an initial "0" if hours are represented in one digit if (s.length() % 2 == 1) s = "0" + s; return expandTimeHMS12or24(doc, s); } private List<Element> expandTimeHMS12or24(Document doc, String s) { // insert seperators: for (int i = s.length() - 1; i > 1; i--) { if (i % 2 == 0) s = s.substring(0, i) + ":" + s.substring(i); } // append sFinal, otherwise no matching s += "h"; switch (s.length()) { case 3: // just hours: return expandTimeH(doc, s); case 6: // hours and minutes: return expandTimeHM(doc, s); case 9: // hours, minutes and seconds: return expandTimeHMS(doc, s); } // else (though it may not occur): return null; } // tells, whether (a) 'pm' indicating letter(s) is/are contained private boolean isAfternoon(String s) { for (int i = 0; i < s.length(); i++) { // "a", "A", "am" and "AM" indicate pre-noon if (s.toLowerCase().charAt(i) == 'p') return true; } // else return false; } // only makes sense for an expedient argument private String add12Hours(String s) { int iHour = 0; if (s.length() == 1) { iHour = (int) (s.charAt(0) - 32); iHour += 12; } if (s.length() == 2) { iHour = ((int) s.charAt(1)) - 48; iHour += (((int) s.charAt(0)) - 48) * 10; iHour += 12; } // ... longing for C methods atoi() and itoa() ... return (new Integer(iHour)).toString(); } private String extractDigits(String s) { StringBuilder sB = new StringBuilder(s); for (int i = 0; i < sB.length(); i++) if (!('0' <= sB.charAt(i) && sB.charAt(i) <= '9')) sB.deleteCharAt(i--); return sB.toString(); } private boolean containsOneOrMoreDigits(String s) { for (int i = 0; i < s.length(); i++) { if ('0' <= s.charAt(i) && s.charAt(i) <= '9') return true; } // else return false; } }