/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.riot.lang; import static org.apache.jena.riot.lang.ReaderTriX.State.GRAPH ; import static org.apache.jena.riot.lang.ReaderTriX.State.OUTER ; import static org.apache.jena.riot.lang.ReaderTriX.State.TRIPLE ; import static org.apache.jena.riot.lang.ReaderTriX.State.TRIX ; import java.io.InputStream ; import java.io.Reader ; import java.util.ArrayList ; import java.util.Collection ; import java.util.List ; import java.util.Objects ; import javax.xml.namespace.QName ; import javax.xml.stream.* ; import org.apache.jena.atlas.web.ContentType ; import org.apache.jena.datatypes.RDFDatatype ; import org.apache.jena.datatypes.xsd.XSDDatatype ; import org.apache.jena.graph.Node ; import org.apache.jena.graph.NodeFactory ; import org.apache.jena.graph.Triple ; import org.apache.jena.riot.ReaderRIOT ; import org.apache.jena.riot.RiotException ; import org.apache.jena.riot.system.* ; import org.apache.jena.riot.writer.StreamWriterTriX ; import org.apache.jena.riot.writer.WriterTriX ; import org.apache.jena.sparql.core.Quad ; import org.apache.jena.sparql.resultset.ResultSetException ; import org.apache.jena.sparql.util.Context ; import org.apache.jena.vocabulary.RDF ; /** Read TriX. * See {@link TriX} for details. * @see TriX * @see WriterTriX * @see StreamWriterTriX */ public class ReaderTriX implements ReaderRIOT { // DTD for TrIX : The schema is a much longer. /* <!-- TriX: RDF Triples in XML --> <!ELEMENT TriX (graph*)> <!ATTLIST TriX xmlns CDATA #FIXED "http://www.w3.org/2004/03/trix/trix-1/"> <!ELEMENT graph (uri*, triple*)> <!ELEMENT triple ((id|uri|plainLiteral|typedLiteral), uri, (id|uri|plainLiteral|typedLiteral))> <!ELEMENT id (#PCDATA)> <!ELEMENT uri (#PCDATA)> <!ELEMENT plainLiteral (#PCDATA)> <!ATTLIST plainLiteral xml:lang CDATA #IMPLIED> <!ELEMENT typedLiteral (#PCDATA)> <!ATTLIST typedLiteral datatype CDATA #REQUIRED> */ private ErrorHandler errorHandler = ErrorHandlerFactory.getDefaultErrorHandler() ; private ParserProfile parserProfile = null ; @Override public void read(InputStream in, String baseURI, ContentType ct, StreamRDF output, Context context) { XMLInputFactory xf = XMLInputFactory.newInstance() ; XMLStreamReader xReader ; try { xReader = xf.createXMLStreamReader(in) ; } catch (XMLStreamException e) { throw new RiotException("Can't initialize StAX parsing engine", e) ; } read(xReader, baseURI, output) ; } @Override public void read(Reader reader, String baseURI, ContentType ct, StreamRDF output, Context context) { XMLInputFactory xf = XMLInputFactory.newInstance() ; XMLStreamReader xReader ; try { xReader = xf.createXMLStreamReader(reader) ; } catch (XMLStreamException e) { throw new ResultSetException("Can't initialize StAX parsing engine", e) ; } read(xReader, baseURI, output) ; } private static String nsRDF = RDF.getURI() ; private static String nsXSD = XSDDatatype.XSD ; // No "#" private static String nsXML0 = "http://www.w3.org/XML/1998/namespace" ; private static String rdfXMLLiteral = RDF.xmlLiteral.getURI() ; enum State { OUTER, TRIX, GRAPH, TRIPLE } private void read(XMLStreamReader parser, String baseURI, StreamRDF output) { ParserProfile profile = parserProfile ; if ( profile == null ) profile = RiotLib.profile(baseURI, false, false, errorHandler) ; if ( errorHandler == null ) setErrorHandler(profile.getHandler()) ; State state = OUTER ; Node g = null ; List<Node> terms = new ArrayList<>() ; try { while(parser.hasNext()) { int event = parser.next() ; switch (event) { case XMLStreamConstants.NAMESPACE: break ; case XMLStreamConstants.START_DOCUMENT : break ; case XMLStreamConstants.END_DOCUMENT : if ( state != OUTER ) staxError(parser.getLocation(), "End of document while processing XML element") ; return ; case XMLStreamConstants.END_ELEMENT : { String tag = parser.getLocalName() ; switch(tag) { case TriX.tagTriple: { int line = parser.getLocation().getLineNumber() ; int col = parser.getLocation().getColumnNumber() ; if ( terms.size() != 3 ) staxError(parser.getLocation(), "Wrong number of terms for a triple. Want 3, got "+terms.size()) ; Node s = terms.get(0) ; Node p = terms.get(1) ; Node o = terms.get(2) ; if ( p.isLiteral() ) staxError(parser.getLocation(), "Predicate is a literal") ; if ( s.isLiteral() ) staxError(parser.getLocation(), "Subject is a literal") ; if ( g == null ) { Triple t = profile.createTriple(s, p, o, line, col) ; output.triple(t) ; } else { if ( g.isLiteral() ) staxError(parser.getLocation(), "graph name is a literal") ; Quad q = profile.createQuad(g, s, p, o, line, col) ; output.quad(q) ; } terms.clear(); // Next is either end of <graph> or another <triple> state = GRAPH ; break ; } case TriX.tagGraph: state = TRIX ; g = null ; break ; case TriX.tagTriX: case TriX.tagTriXAlt: // We don't worry about mismatched tags. state = OUTER ; break ; } break ; } case XMLStreamConstants.START_ELEMENT : { String tag = parser.getLocalName() ; switch (tag) { case TriX.tagTriX: case TriX.tagTriXAlt: if ( state != OUTER ) staxErrorOutOfPlaceElement(parser) ; state = TRIX ; break ; // structure case TriX.tagGraph: if ( state != TRIX ) staxErrorOutOfPlaceElement(parser) ; // URI? state = GRAPH ; break ; case TriX.tagTriple: { if ( state != GRAPH ) staxErrorOutOfPlaceElement(parser) ; state = TRIPLE ; break ; } // Can occur in <graph> and <triple> case TriX.tagId: case TriX.tagQName: case TriX.tagURI: { if ( state != GRAPH && state != TRIPLE ) staxErrorOutOfPlaceElement(parser) ; Node n = term(parser, profile) ; if ( state == GRAPH ) { if ( g != null ) staxError(parser.getLocation(), "Duplicate graph name") ; g = n ; if ( g.isLiteral() ) staxError(parser.getLocation(), "graph name is a literal") ; } else add(terms, n, 3, parser) ; break ; } case TriX.tagPlainLiteral: case TriX.tagTypedLiteral: { if ( state != TRIPLE ) staxErrorOutOfPlaceElement(parser) ; Node n = term(parser, profile) ; add(terms, n, 3, parser) ; break ; } default: staxError(parser.getLocation(), "Unrecognized XML element: "+qnameAsString(parser.getName())) ; break ; } } } } staxError("Premature end of file") ; return ; } catch (XMLStreamException ex) { staxError(parser.getLocation(), "XML error: "+ex.getMessage()) ; } } private void add(Collection<Node> acc, Node node, int max, XMLStreamReader parser) { if ( acc.size() >= max ) staxError(parser.getLocation(), "Too many terms for a triple: "+node) ; acc.add(node) ; } private void staxErrorOutOfPlaceElement(XMLStreamReader parser) { staxError(parser.getLocation(), "Out of place XML element: "+tagName(parser)) ; } private Node term(XMLStreamReader parser, ParserProfile profile) throws XMLStreamException { String tag = parser.getLocalName() ; int line = parser.getLocation().getLineNumber() ; int col = parser.getLocation().getColumnNumber() ; switch(tag) { case TriX.tagURI: { // Two uses! String x = parser.getElementText() ; Node n = profile.createURI(x, line, col) ; return n ; } case TriX.tagQName: { String x = parser.getElementText() ; int idx = x.indexOf(':') ; if ( idx == -1 ) staxError(parser.getLocation(), "Expected ':' in prefixed name. Found "+x) ; String[] y = x.split(":", 2) ; // Allows additional ':' String prefUri = parser.getNamespaceURI(y[0]) ; String local = y[1] ; return profile.createURI(prefUri+local, line, col) ; } case TriX.tagId: { String x = parser.getElementText() ; return profile.createBlankNode(null, x, line, col) ; } case TriX.tagPlainLiteral: { // xml:lang int x = parser.getAttributeCount() ; if ( x > 1 ) // Namespaces? staxError(parser.getLocation(), "Multiple attributes : only one allowed") ; String lang = null ; if ( x == 1 ) lang = attribute(parser, nsXML0, TriX.attrXmlLang) ; String lex = parser.getElementText() ; if ( lang == null ) return profile.createStringLiteral(lex, line, col) ; else return profile.createLangLiteral(lex, lang, line, col) ; } case TriX.tagTypedLiteral: { int nAttr = parser.getAttributeCount() ; if ( nAttr != 1 ) staxError(parser.getLocation(), "Multiple attributes : only one allowed") ; String dt = attribute(parser, TriX.NS, TriX.attrDatatype) ; if ( dt == null ) staxError(parser.getLocation(), "No datatype attribute") ; RDFDatatype rdt = NodeFactory.getType(dt) ; String lex = (rdfXMLLiteral.equals(dt)) ? slurpRDFXMLLiteral(parser) : parser.getElementText() ; return profile.createTypedLiteral(lex, rdt, line, col) ; } default: { QName qname = parser.getName() ; staxError(parser.getLocation(), "Unrecognized tag -- "+qnameAsString(qname)) ; return null ; } } } private String slurpRDFXMLLiteral(XMLStreamReader parser) throws XMLStreamException { StringBuffer content = new StringBuffer(); int depth = 0 ; while(parser.hasNext()) { int event = parser.next(); switch (event) { case XMLStreamConstants.START_ELEMENT: { QName qname = parser.getName() ; content.append("<") ; content.append(qnameAsString(qname)) ; int N = parser.getNamespaceCount() ; for ( int i = 0 ; i < N ; i ++ ) { String p = parser.getNamespacePrefix(i) ; if ( p == null ) p = "xmlns" ; else p = "xmlns:"+p ; String v = parser.getNamespaceURI(i) ; content.append(" ") ; content.append(p) ; content.append("=\"") ; content.append(v) ; content.append("\"") ; } N = parser.getAttributeCount() ; for ( int i = 0 ; i < N ; i ++ ) { QName name = parser.getAttributeName(i) ; String a = qnameAsString(name) ; String v = parser.getAttributeValue(i) ; content.append(" ") ; content.append(a) ; content.append("=\"") ; content.append(v) ; content.append("\"") ; } content.append(">") ; depth++ ; break ; } case XMLStreamConstants.END_ELEMENT: { depth-- ; if ( depth == -1 ) { // Close tag of typed Literal. return content.toString(); } QName qname = parser.getName() ; String x = qnameAsString(qname) ; content.append("</"+x+">") ; // Final whitespace? break ; } case XMLStreamConstants.CHARACTERS: case XMLStreamConstants.CDATA: case XMLStreamConstants.SPACE: case XMLStreamConstants.ENTITY_REFERENCE: case XMLStreamConstants.PROCESSING_INSTRUCTION: case XMLStreamConstants.COMMENT: // String $ = parser.getText() ; // System.out.println("----") ; // System.out.println($) ; // System.out.println("----") ; content.append(parser.getText()) ; break ; case XMLStreamConstants.END_DOCUMENT: staxError(parser.getLocation(), "End of file") ; } } staxError(parser.getLocation(), "End of file") ; return null ; } private String tagName(XMLStreamReader parser) { return qnameAsString(parser.getName()) ; } private String qnameAsString(QName qname) { String x = qname.getPrefix() ; if ( x == null || x.isEmpty() ) return qname.getLocalPart() ; return x+":"+qname.getLocalPart() ; } private String attribute(XMLStreamReader parser, String nsURI, String localname) { int x = parser.getAttributeCount() ; if ( x > 1 ) // Namespaces? staxError(parser.getLocation(), "Multiple attributes : only one allowed : "+tagName(parser)) ; if ( x == 0 ) return null ; String attrPX = parser.getAttributePrefix(0) ; String attrNS = parser.getAttributeNamespace(0) ; if ( attrNS == null ) attrNS = parser.getName().getNamespaceURI() ; String attrLN = parser.getAttributeLocalName(0) ; if ( ! Objects.equals(nsURI, attrNS) || ! Objects.equals(attrLN, localname) ) { staxError(parser.getLocation(), "Unexpected attribute : "+attrPX+":"+attrLN+" at "+tagName(parser)) ; } String attrVal = parser.getAttributeValue(0) ; return attrVal ; } private void staxError(String msg) { staxError(-1, -1, msg) ; } private void staxError(Location loc, String msg) { staxError(loc.getLineNumber(), loc.getColumnNumber(), msg) ; } private void staxError(int line, int col, String msg) { getErrorHandler().error(msg, line, col) ; } @Override public ErrorHandler getErrorHandler() { return errorHandler ; } @Override public void setErrorHandler(ErrorHandler errorHandler) { this.errorHandler = errorHandler ; } @Override public ParserProfile getParserProfile() { return parserProfile ; } @Override public void setParserProfile(ParserProfile profile) { this.parserProfile = profile ; } }