/**
* Copyright 2010 Voxeo Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this
* file except in compliance with the License.
*
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/
package com.voxeo.moho.util;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class NLSMLParser {
private static final List<Map<String, String>> EMPTY_RESULT = new ArrayList<Map<String, String>>(0);
public static List<Map<String, String>> parse(final String xml) throws Exception {
final DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
InputStream is = null;
Document d = null;
try {
is = new ByteArrayInputStream(xml.getBytes());
d = factory.newDocumentBuilder().parse(is);
}
finally {
if (is != null) {
try {
is.close();
}
catch (final IOException e) {
}
}
}
final NodeList results = d.getElementsByTagName("result");
if (results.getLength() == 0) {
return EMPTY_RESULT;
}
final List<Map<String, String>> retval = new ArrayList<Map<String, String>>(results.getLength());
for (int i = 0; i < results.getLength(); i++) {
final Element result = (Element) results.item(i);
final String fullGrammarID = result.getAttribute("grammar");
final List<Element> interpretations = getChildElementsList(result, "interpretation");
for (final Element interpretation : interpretations) {
final Map<String, String> reco = new HashMap<String, String>();
retval.add(reco);
final String localFullGrammarID = interpretation.getAttribute("grammar");
String gramid;
String gram;
if (localFullGrammarID != null && !localFullGrammarID.equals("")) {
gramid = parseGrammarID(localFullGrammarID, false);
gram = parseGrammarID(localFullGrammarID, true);
}
else {
gramid = parseGrammarID(fullGrammarID, false);
gram = parseGrammarID(fullGrammarID, true);
}
reco.put("_grammar_id", gramid);
reco.put("_grammar", gram);
final String conf = interpretation.getAttributes().getNamedItem("confidence").getNodeValue();
final float confF = Float.parseFloat(conf) / 100F;
reco.put("_confidence", Float.toString(confF));
reco.put("_interpretationconfidence", Float.toString(confF));
String tag = getTextContent(interpretation);
if (tag != null) {
tag = tag.trim();
if (tag.length() > 0) {
tag = tag.replaceAll("\\s*", "");
reco.put("_tag", tag);
}
}
Element input = getFirstChildElement(interpretation, "input");
if (input != null) {
String interp = getTextContent(input);
if (interp != null && interp.trim().length() > 0) {
reco.put("_interpretation", interp);
}
final Node modeNode = input.getAttributes().getNamedItem("mode");
if (modeNode != null) {
String inputmode = modeNode.getNodeValue().trim();
if (inputmode.equalsIgnoreCase("speech")) {
inputmode = "voice"; // per VXML
}
reco.put("_inputmode", new String(inputmode));
}
}
final Element voxeoresult = getFirstChildElement(interpretation, "voxeoresult");
if (voxeoresult != null) {
final Element concept = getFirstChildElement(voxeoresult, "concept");
if (concept != null) {
String conceptText = getTextContent(concept);
if (conceptText != null) {
conceptText = conceptText.trim();
if (conceptText.length() > 0) {
reco.put("_concept", conceptText);
}
}
}
final Element interp = getFirstChildElement(voxeoresult, "interpretation");
if (interp != null) {
String interpText = getTextContent(interp);
if (interpText != null) {
interpText = interpText.trim();
if (interpText.length() > 0) {
reco.put("_interpretation", interpText);
}
}
}
}
}
}
return retval;
}
private static Element getFirstChildElement(final Node node, final String nodeName) {
return getFirstChildElementIntern(node, new QName(nodeName));
}
private static Element getFirstChildElementIntern(final Node node, final QName nodeName) {
Element childElement = null;
final Iterator<Element> it = getChildElementsIntern(node, nodeName).iterator();
if (it.hasNext()) {
childElement = it.next();
}
return childElement;
}
private static List<Element> getChildElementsList(final Node node, final String nodeName) {
return getChildElementsIntern(node, new QName(nodeName));
}
private static List<Element> getChildElementsIntern(final Node node, final QName nodeName) {
final ArrayList<Element> list = new ArrayList<Element>();
final NodeList nlist = node.getChildNodes();
for (int i = 0; i < nlist.getLength(); i++) {
final Node child = nlist.item(i);
if (child.getNodeType() == Node.ELEMENT_NODE) {
if (nodeName == null) {
list.add((Element) child);
}
else {
QName qname;
if (nodeName.getNamespaceURI().length() > 0) {
qname = new QName(child.getNamespaceURI(), child.getLocalName());
}
else {
qname = new QName(child.getLocalName() == null ? child.getNodeName() : child.getLocalName());
}
if (qname.equals(nodeName)) {
list.add((Element) child);
}
}
}
}
return Collections.unmodifiableList(list);
}
private static String getTextContent(final Node node) {
boolean hasTextContent = false;
final StringBuffer buffer = new StringBuffer();
final NodeList nlist = node.getChildNodes();
for (int i = 0; i < nlist.getLength(); i++) {
final Node child = nlist.item(i);
if (child.getNodeType() == Node.TEXT_NODE) {
buffer.append(child.getNodeValue());
hasTextContent = true;
}
}
return hasTextContent ? buffer.toString() : null;
}
private static String parseGrammarID(final String fullGrammarID, final boolean suffix) {
// session:0@vomoto.com
final int start = fullGrammarID.indexOf(":") + 1;
final int end = fullGrammarID.indexOf("@");
if (start == -1 || end == -1) {
return null;
}
else {
String foo = null;
if (start == end) {
foo = "" + fullGrammarID.charAt(end);
}
else {
if (suffix) {
foo = fullGrammarID.substring(start);
}
else {
foo = fullGrammarID.substring(start, end);
}
}
return foo;
}
}
}