/**
* Copyright 2003 DFKI GmbH.
* All Rights Reserved. Use is subject to license terms.
*
* This file is part of MARY TTS.
*
* MARY TTS is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, version 3 of the License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package marytts.language.de.phonemiser;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import marytts.datatypes.MaryXML;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import marytts.util.dom.NameNodeFilter;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.traversal.DocumentTraversal;
import org.w3c.dom.traversal.NodeFilter;
import org.w3c.dom.traversal.NodeIterator;
import org.w3c.dom.traversal.TreeWalker;
/**
* Add inflection endings to expanded abbreviations and ordinals.
*
* @author Marc Schröder
*
*
*/
public class Inflection {
private final Map<String, String> endingTable;
private Logger logger;
public Inflection() throws IOException {
endingTable = Collections.synchronizedMap(new HashMap<String, String>());
// Ending class applicable to:
// masc singular nom
endingTable.put("1d", ""); // with definite determiner
endingTable.put("1i", "r"); // with indefinite determiner
endingTable.put("1", "r"); // without determiner
// Ending class applicable to:
// masc singular gen/acc, neutrum singular gen
endingTable.put("2d", "n"); // with definite determiner
endingTable.put("2i", "n"); // with indefinite determiner
endingTable.put("2", "n"); // without determiner
// Ending class applicable to:
// masc/neutrum singular dat
endingTable.put("3d", "n"); // with definite determiner
endingTable.put("3i", "n"); // with indefinite determiner
endingTable.put("3", "m"); // without determiner
// Ending class applicable to:
// masc/fem/neutrum plural nom/acc
endingTable.put("4d", "n"); // with definite determiner
endingTable.put("4", ""); // without determiner
// Ending class applicable to:
// masc/fem/neutrum plural gen
endingTable.put("5d", "n"); // with definite determiner
endingTable.put("5", "r"); // without determiner
// Ending class applicable to:
// masc/fem/neutrum plural dat
endingTable.put("6d", "n"); // with definite determiner
endingTable.put("6", "n"); // without determiner
// Ending class applicable to:
// fem singular nom/acc
endingTable.put("7d", ""); // with definite determiner
endingTable.put("7i", ""); // with indefinite determiner
endingTable.put("7", ""); // without determiner
// Ending class applicable to:
// fem singular gen/dat
endingTable.put("8d", "n"); // with definite determiner
endingTable.put("8i", "n"); // with indefinite determiner
endingTable.put("8", "r"); // without determiner
// Ending class applicable to:
// neutrum singular nom/acc
endingTable.put("9d", ""); // with definite determiner
endingTable.put("9i", "s"); // with indefinite determiner
endingTable.put("9", "s"); // without determiner
logger = MaryUtils.getLogger("Inflection");
}
public void determineEndings(Document doc) {
// Search for <t> tags with an "ending" attribute:
NodeIterator ni = ((DocumentTraversal) doc).createNodeIterator(doc, NodeFilter.SHOW_ELEMENT, new NodeFilter() {
public short acceptNode(Node n) {
if (!(n instanceof Element))
return NodeFilter.FILTER_SKIP;
Element e = (Element) n;
if (e.getTagName().equals(MaryXML.TOKEN) && e.hasAttribute("ending"))
return NodeFilter.FILTER_ACCEPT;
return NodeFilter.FILTER_SKIP;
}
}, true);
Element toInflect = null;
while ((toInflect = (Element) ni.nextNode()) != null) {
logger.debug("Token `" + MaryDomUtils.tokenText(toInflect) + "' needs an inflection ending.");
// If it has an "ending" attribute, it must also have a "sounds_like"
// attribute.
if (!toInflect.hasAttribute("sounds_like")) {
logger.warn("Token `" + MaryDomUtils.tokenText(toInflect)
+ "' has an `ending' attribute, but no `sounds_like' attribute. Ignoring.");
continue;
}
// For adverbial use, simply append "-ns":
if (toInflect.getAttribute("ending").equals("ordinal") && toInflect.getAttribute("pos").equals("ADV")) {
toInflect.setAttribute("sounds_like", toInflect.getAttribute("sounds_like") + "ns");
logger.debug("...added adverbial ending.");
continue;
}
// Otherwise, it is an adjective, so we need to analyse the NP/PP:
// Start with the fullest possible set of ending classes, then
// reduce ambiguity my means of the context.
Set<String> endingClasses = new HashSet<String>(Arrays.asList(new String[] { "1", "2", "3", "4", "5", "6", "7", "8",
"9" }));
// Also need the determiner type:
String detType = null;
TreeWalker tw = ((DocumentTraversal) doc).createTreeWalker(doc, NodeFilter.SHOW_ELEMENT, new NameNodeFilter(
MaryXML.TOKEN), true);
tw.setCurrentNode(toInflect);
// If toInflect is not the first token in the NP/PP,
// search to the left:
if (!toInflect.getAttribute("syn_attach").equals("1")) {
boolean foundStart = false;
Element t;
while (!foundStart && (t = (Element) tw.previousNode()) != null) {
String synAttach = t.getAttribute("syn_attach");
if (synAttach.equals("1") || t.getAttribute("syn_phrase").equals("CNP")
|| t.getAttribute("syn_phrase").equals("CPP")) {
// Found the start of the NP/PP
foundStart = true;
}
if (!synAttach.equals("2")) {
// And try to find the determiner type:
if (detType == null) {
detType = getDeterminerType(t);
}
}
}
}
// Then search to the right:
tw.setCurrentNode(toInflect);
Element t;
boolean haveSeenNoun = false;
while ((t = (Element) tw.nextNode()) != null && !t.getAttribute("syn_attach").equals("1") && // Stop at conjunction in
// coordinated noun
// phrases
// if the left part
// already has its own
// noun
// (as in:
// "der 2. Mann und die 3. Frau",
// as opposed to
// "der 2. und der 3. Mann").
!((t.getAttribute("syn_phrase").equals("CNP") || t.getAttribute("syn_phrase").equals("CPP")) && haveSeenNoun)) {
if (!t.getAttribute("syn_attach").equals("2")) {
if (t.getAttribute("pos").equals("NN"))
haveSeenNoun = true;
}
}
// Now the disambiguation is complete.
Set<String> endings = new HashSet<String>();
Iterator<String> it = endingClasses.iterator();
while (it.hasNext()) {
String endingClass = it.next();
String key = (detType == null ? endingClass : endingClass + detType);
String ending = (String) endingTable.get(key);
assert (ending != null);
endings.add(ending);
logger.debug("...ending class " + endingClass + " with "
+ (detType == null ? "no" : (detType.equals("d") ? "definite" : "indefinite")) + " determiner: Ending `e"
+ ending + "'");
}
// If there is exactly one ending in the endings Set, then we can use it:
if (endings.size() == 1) {
String ending = (String) endings.iterator().next();
logger.debug("...correct ending should be `e" + ending + "'");
StringBuilder soundsLike = new StringBuilder(toInflect.getAttribute("sounds_like"));
// abbreviations don't have an "e" at the end, so add it:
if (toInflect.getAttribute("ending").equals("adjadv"))
soundsLike.append("e");
soundsLike.append(ending);
toInflect.setAttribute("sounds_like", soundsLike.toString());
} else {
logger.debug("...cannot determine right ending, using default `e'.");
}
}
}
/**
* For a given token t, try to determine whether it is a definite or indefinite determiner.
*
* @param t
* a token to verify.
* @return "d" for definite determiner, "i" for indefinite determiner, and null if the token is not a determiner.
*/
private String getDeterminerType(Element t) {
// special case: APPRART (zum, hinters, ...) is a definite determiner
// (not correctly treated in mmorph).
if (t.getAttribute("pos").equals("APPRART"))
return "d";
return null;
}
}