/*******************************************************************************
* Copyright (c) 2008 CWI.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* jurgen@vinju.org - initial API and implementation
*******************************************************************************/
package org.rascalmpl.value.io;
import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.rascalmpl.value.IListWriter;
import org.rascalmpl.value.IMapWriter;
import org.rascalmpl.value.ISetWriter;
import org.rascalmpl.value.IValue;
import org.rascalmpl.value.IValueFactory;
import org.rascalmpl.value.exceptions.FactParseError;
import org.rascalmpl.value.exceptions.FactTypeUseException;
import org.rascalmpl.value.exceptions.UnsupportedTypeException;
import org.rascalmpl.value.type.Type;
import org.rascalmpl.value.type.TypeFactory;
import org.rascalmpl.value.type.TypeStore;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
/**
* This IValueReader parses and validates certain forms of XML and deserializes
* it as IValues. The forms of XML allowed are limited by a number of different
* value types. In particular, it allows: <ul>
* <li> TreeSortTypes and TreeNodeTypes </li>
* <li> lists, sets, relations and maps, but not unless they are wrapped by a single
* ConstructorType. I.o.w. a container must be the only child of a tree node.
* Elements of containers are juxtapositioned as children of this node.</li>
* <li> tuples, but not nested ones. And tuples of containers are not allowed.
* elements of tuples are juxtapositioned in the xml files.</li>
* <li> basic types, such as str, int, double; with the same restriction as for
* container types, they must be the only child of a tree node.</li>
* <li> lists of tuples, sets of tuples and maps of tuples are allowed, but not
* lists of lists, tuples of tuples, lists in tuples, sets in tuples, etc.
* If such nesting is needed, it is required to use a wrapping tree node.</li>
* </ul>
* There is no support for NamedTypes yet, only TreeSortType and ConstructorType are
* allowed.
*
* The limitations of this class are governed by wanting to avoid ambiguity
* while validating XML using the pdb's type system and the inherent impedance
* mismatch between the type system of pdb and the structure of XML.
*
* Use this class to import many forms of XML data into PDB.
*
*/
public class XMLReader extends AbstractTextReader {
private DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
private IValueFactory vf;
private static final TypeFactory TF = TypeFactory.getInstance();
private TypeStore ts;
public IValue read(IValueFactory factory, TypeStore store, Type type, Reader stream)
throws FactTypeUseException, IOException {
this.vf = factory;
this.ts = store;
try {
Document doc = domFactory.newDocumentBuilder().parse(new InputSource(stream));
return parse(doc.getDocumentElement(), type);
} catch (SAXException se) {
throw new IOException("Parsing of value failed because XML was invalid: " + se.getMessage());
} catch (ParserConfigurationException pce) {
throw new IOException("Parsing of value failed because XML configuration is wrong: " + pce.getMessage());
} catch (DOMException de) {
throw new IOException("Parsing of value failed because of a XML document failure: " + de.getMessage());
} catch (NumberFormatException nfe) {
throw new FactParseError("Expected a number, got something different", nfe);
}
}
private IValue parse(Node node, Type expected) {
if (expected.isAbstractData()) {
Type sort = expected;
String name = node.getNodeName();
if (isListWrapper(name, sort)) {
return parseList(node, sort);
}
else if (isSetWrapper(name, sort)) {
return parseSet(node, sort);
}
else if (isRelationWrapper(name, sort)) {
return parseRelation(node, sort);
}
else if (isMapWrapper(name, sort)) {
return parseMap(node, sort);
}
else {
return parseTreeSort(node, sort);
}
}
else if (expected.equivalent(TF.stringType())) {
return parseString(node);
}
else if (expected.equivalent(TF.integerType())) {
return parseInt(node);
}
else if (expected.equivalent(TF.realType())) {
return parseDouble(node);
}
else if (expected.equivalent(TF.rationalType())) {
return parseRational(node);
}
else if (expected.isExternalType()) {
// external types default to string
return parseString(node);
}
throw new UnsupportedTypeException(
"Outermost or nested tuples, lists, sets, relations or maps are not allowed.", expected);
}
private boolean isListWrapper(String name, Type expected) {
Set<Type> nodeTypes = ts.lookupConstructor(expected, name);
if (nodeTypes.size() > 0) {
Type nodeType = nodeTypes.iterator().next();
return nodeType.getArity() == 1
&& nodeType.getFieldTypes().getFieldType(0).isSubtypeOf(TF.listType(TF.valueType()));
}
return false;
}
private boolean isSetWrapper(String name, Type expected) {
Set<Type> nodeTypes = ts.lookupConstructor(expected, name);
if (nodeTypes.size() > 0) {
Type nodeType = nodeTypes.iterator().next();
return nodeType.getArity() == 1
&& nodeType.getFieldTypes().getFieldType(0).isSubtypeOf(TF.setType(TF.valueType()));
}
return false;
}
private boolean isRelationWrapper(String name, Type expected) {
Set<Type> nodeTypes = ts.lookupConstructor(expected, name);
if (nodeTypes.size() > 0) {
Type nodeType = nodeTypes.iterator().next();
return nodeType.getArity() == 1
&& nodeType.getFieldTypes().getFieldType(0).isSubtypeOf(TF.setType(TF.valueType()))
&& nodeType.getFieldTypes().getFieldType(0).getElementType().isFixedWidth();
}
return false;
}
private boolean isMapWrapper(String name, Type expected) {
Set<Type> nodeTypes = ts.lookupConstructor(expected, name);
if (nodeTypes.size() > 0) {
Type nodeType = nodeTypes.iterator().next();
return nodeType.getArity() == 1
&& nodeType.getFieldTypes().getFieldType(0).isMap();
}
return false;
}
// TODO: implement this
private IValue parseRational(Node node) {
String contents = node.getNodeValue().trim();
String[] parts = contents.split("r");
if (parts.length == 2) {
return vf.rational(vf.integer(Integer.parseInt(parts[0])), vf.integer(Integer.parseInt(parts[0])));
}
throw new FactParseError(contents, 0);
}
private IValue parseDouble(Node node) {
return vf.real(Double.parseDouble(node.getNodeValue().trim()));
}
private IValue parseInt(Node node) {
return vf.integer(Integer.parseInt(node.getNodeValue().trim()));
}
private IValue parseString(Node node) {
return vf.string(node.getNodeValue());
}
private IValue parseMap(Node node, Type expected) {
Set<Type> nodeTypes = ts.lookupConstructor(expected, node.getNodeName());
// TODO: implement overloading
Type nodeType = nodeTypes.iterator().next();
Type mapType = nodeType.getFieldType(0);
Type keyType = mapType.getKeyType();
Type valueType = mapType.getValueType();
NodeList children = node.getChildNodes();
IMapWriter writer = vf.mapWriter();
for (int i = 0; i + 1 < children.getLength(); ) {
IValue key, value;
if (keyType.isFixedWidth()) {
Type tuple = keyType;
IValue [] elements = new IValue[tuple.getArity()];
for (int j = 0; j < tuple.getArity(); j++) {
elements[i] = parse(children.item(i++), tuple.getFieldType(j));
}
key = vf.tuple(elements);
}
else {
key = parse(children.item(i++), keyType);
}
if (valueType.isFixedWidth()) {
Type tuple = keyType;
IValue [] elements = new IValue[tuple.getArity()];
for (int j = 0; j < tuple.getArity(); j++) {
elements[i] = parse(children.item(i++), tuple.getFieldType(j));
}
value = vf.tuple(elements);
}
else {
value = parse(children.item(i++), valueType);
}
writer.put(key, value);
}
return vf.constructor(nodeType, writer.done());
}
private IValue parseRelation(Node node, Type expected) {
Set<Type> nodeTypes = ts.lookupConstructor(expected, node.getNodeName());
// TODO implement overloading
Type nodeType = nodeTypes.iterator().next();
Type relType = nodeType.getFieldType(0);
Type fields = relType.getFieldTypes();
NodeList children = node.getChildNodes();
ISetWriter writer = vf.setWriter();
for (int i = 0; i < children.getLength(); ) {
IValue[] elements = new IValue[fields.getArity()];
for (int j = 0; i < children.getLength() && j < fields.getArity(); j++) {
elements[j] = parse(children.item(i++), fields.getFieldType(j));
}
writer.insert(vf.tuple(elements));
}
return vf.constructor(nodeType, writer.done());
}
private IValue parseSet(Node node, Type expected) {
Set<Type> nodeTypes = ts.lookupConstructor(expected, node.getNodeName());
// TODO implement overloading
Type nodeType = nodeTypes.iterator().next();
Type setType = nodeType.getFieldType(0);
Type elementType = setType.getElementType();
NodeList children = node.getChildNodes();
ISetWriter writer = vf.setWriter();
if (!elementType.isFixedWidth()) {
for (int i = 0; i < children.getLength(); i++) {
writer.insert(parse(children.item(i), elementType));
}
} else {
Type tuple = elementType;
for (int i = 0; i < children.getLength(); ) {
IValue[] elements = new IValue[tuple.getArity()];
for (int j = 0; i < children.getLength() && j < tuple.getArity(); j++) {
elements[j] = parse(children.item(i++), tuple.getFieldType(j));
}
writer.insert(vf.tuple(elements));
}
}
return vf.constructor(nodeType, writer.done());
}
private IValue parseList(Node node, Type expected) {
Set<Type> nodeTypes = ts.lookupConstructor(expected, node.getNodeName());
// TODO implement overloading
Type nodeType = nodeTypes.iterator().next();
Type listType = nodeType.getFieldType(0);
Type elementType = listType.getElementType();
NodeList children = node.getChildNodes();
IListWriter writer = vf.listWriter();
if (!elementType.isFixedWidth()) {
for (int i = 0; i < children.getLength(); i++) {
writer.append(parse(children.item(i), elementType));
}
} else {
Type tuple = elementType;
for (int i = 0; i < children.getLength(); ) {
IValue[] elements = new IValue[tuple.getArity()];
for (int j = 0; i < children.getLength() && j < tuple.getArity(); j++) {
elements[j] = parse(children.item(i++), tuple.getFieldType(j));
}
writer.append(vf.tuple(elements));
}
}
return vf.constructor(nodeType, writer.done());
}
private IValue parseTreeSort(Node node, Type expected) {
// TODO deal with overloading
Type nodeType = ts.lookupConstructor(expected, node.getNodeName()).iterator().next();
Type childrenTypes = nodeType.getFieldTypes();
NodeList children = node.getChildNodes();
IValue[] values = new IValue[nodeType.getArity()];
int sourceIndex = 0;
int targetIndex = 0;
while(sourceIndex < children.getLength() && targetIndex < nodeType.getArity()) {
Type childType = childrenTypes.getFieldType(targetIndex);
if (childType.isFixedWidth()) {
Type tuple = childType;
IValue[] elements = new IValue[tuple.getArity()];
for (int tupleIndex = 0; tupleIndex < tuple.getArity() && sourceIndex < children.getLength(); tupleIndex++, sourceIndex++) {
elements[tupleIndex] = parse(children.item(sourceIndex), tuple.getFieldType(tupleIndex));
}
values[targetIndex++] = vf.tuple(elements);
}
else {
values[targetIndex++] = parse(children.item(sourceIndex++), childType);
}
}
return vf.constructor(nodeType, values);
}
}