/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.sparql.resultset; import java.io.InputStream; import java.util.*; import org.apache.jena.atlas.AtlasException; import org.apache.jena.atlas.io.IO; import org.apache.jena.atlas.io.IndentedWriter; import org.apache.jena.atlas.io.PeekReader; import org.apache.jena.atlas.iterator.PeekIterator; import org.apache.jena.atlas.json.io.parser.TokenizerJSON; import org.apache.jena.datatypes.TypeMapper; import org.apache.jena.graph.Node; import org.apache.jena.graph.NodeFactory; import org.apache.jena.query.QueryException; import org.apache.jena.riot.RiotParseException; import org.apache.jena.riot.tokens.Token; import org.apache.jena.riot.tokens.TokenType; import org.apache.jena.sparql.core.Var; import org.apache.jena.sparql.engine.binding.Binding; import org.apache.jena.sparql.engine.binding.BindingFactory; import org.apache.jena.sparql.engine.binding.BindingMap; import org.apache.jena.sparql.engine.iterator.QueryIteratorBase; import org.apache.jena.sparql.serializer.SerializationContext; /** * Streaming Iterator over SPARQL JSON results, not yet fully implemented (see * JENA-267) * <p> * Creating the Iterator automatically causes it to parse a small chunk of the * stream to determine the variables in the result set either by reading the * header or reading some portion of the results if the results appear before * the header since JSON does not guarantee the order of keys within an object * </p> */ public class JSONInputIterator extends QueryIteratorBase { private InputStream input; private boolean isBooleanResults = false, boolResult = false, headerSeen = false; private Binding binding = null; private TokenizerJSON tokens; private PeekIterator<Token> peekIter; private Queue<Binding> cache = new LinkedList<>(); private Set<String> vars = new HashSet<>(); /** * Creates a SPARQL JSON Iterator * <p> * Automatically parses some portion of the input to determine the variables * in use * </p> */ public JSONInputIterator(InputStream input) { this.input = input; this.tokens = new TokenizerJSON(PeekReader.makeUTF8(input)); this.peekIter = new PeekIterator<>(this.tokens); // We should always parse the first little bit to see the head stuff or // to cache a chunk of results and infer the headers // Primarily we are trying to find out what the variables are preParse(); } /** * Returns the variables present in the result sets */ public Iterator<String> getVars() { return vars.iterator(); } /** * Gets whether the SPARQL JSON represents a boolean result set */ public boolean isBooleanResult() { return isBooleanResults; } /** * Does the pre-parsing which attempts to read the header of the results * file and determine variables present * <p> * If the header is encountered first then we read this, if the results are * encountered first we parse the first 100 results and determine the * variables present from those instead * </p> */ private void preParse() { // First off the { to start the object expect("Expected the start of the JSON Results Object", TokenType.LBRACE); // Then expect to see a Property Name // Loop here because we might see some things we can discard first do { if ( !isPropertyName() ) { Token t = nextToken(); String name = t.getImage(); checkColon(); if ( name.equals("head") ) { if ( headerSeen ) exception(t, "Invalid duplicate header property"); parseHeader(); // Continue afterwards because we want to be in place to // start streaming results } else if ( name.equals("boolean") ) { parseBoolean(); // Afterwards we continue because we want to see an empty // head } else if ( name.equals("results") ) { if ( isBooleanResults ) exception(t, "Encountered results property when boolean property has already been countered"); // Scroll to first result parseToFirstResult(); // If we already saw the header then exit at this point if ( headerSeen ) return; // If not we're going to pre-cache some chunk of results so // we can infer the variable names boolean complete = cacheResults(100); // If this exhausted the result set then we can continue // looking for the header // Otherwise we should exit as we may eventually see the // header later... if ( !complete ) { // TODO Now determine variables present from this return; } } else { ignoreValue(); } checkComma(TokenType.RBRACE); } else if ( lookingAt(TokenType.RBRACE) ) { // We hit the end of the result object already if ( !headerSeen ) exception(peekToken(), "End of JSON Results Object encountered before a valid header was seen"); nextToken(); // Shouldn't be any further content if ( !lookingAt(TokenType.EOF) ) exception(peekToken(), "Unexpected content after end of JSON Results Object"); // Can stop our initial buffering at this stage return; } else { exception(peekToken(), "Expected a JSON property name but got %s", peekToken()); } } while (true); } private void parseHeader() { do { if ( isPropertyName() ) { Token t = nextToken(); String name = t.getImage(); checkColon(); if ( name.equals("vars") ) { parseVars(); } else if ( name.equals("link") ) { // Throw away the links skipLinks(); } else { exception(t, "Unexpected property %s encountered in head object", name); } checkComma(TokenType.RBRACE); } else if ( lookingAt(TokenType.RBRACE) ) { nextToken(); return; } else { exception(peekToken(), "Unexpected Token encountered while parsing head object"); } } while (true); } private void parseVars() { if ( lookingAt(TokenType.LBRACKET) ) { nextToken(); vars.clear(); do { if ( lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2) ) { Token t = nextToken(); String var = t.getImage(); vars.add(var); checkComma(TokenType.RBRACKET); } else if ( lookingAt(TokenType.RBRACKET) ) { nextToken(); return; } else { exception(peekToken(), "Unexpected Token encountered while parsing the variables list in the head object"); } } while (true); } else { exception(peekToken(), "Unexpected Token ecountered, expected a [ to start the array of variables in the head object"); } } private void skipLinks() { if ( lookingAt(TokenType.LBRACKET) ) { nextToken(); do { if ( lookingAt(TokenType.RBRACKET) ) { // End of links nextToken(); return; } else if ( lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2) ) { // Ignore link and continue nextToken(); } else { exception(peekToken(), "Unexpected Token when a Link URI was expected"); } checkComma(TokenType.RBRACKET); } while (true); } else { exception(peekToken(), "Unexpected token when a [ was expected to start the list of URIs for a link property"); } } private void parseToFirstResult() { if ( lookingAt(TokenType.LBRACE) ) { nextToken(); if ( isPropertyName() ) { Token t = nextToken(); String name = t.getImage(); if ( name.equals("bindings") ) { checkColon(); if ( lookingAt(TokenType.LBRACKET) ) { nextToken(); } else { exception(peekToken(), "Unexpected Token encountered, expected a [ for the start of the bindings array"); } } else { exception(t, "Unexpected Token encountered, expected the bindings property"); } } else { exception(peekToken(), "Unexpected Token ecnountered, expected the bindings property"); } } else { exception(peekToken(), "Unexpected Token encountered, expected a { to start the results list object"); } } private void parseToEnd() { // TODO Parse through to end of the JSON document consuming the header // if we haven't seen it already checkComma(TokenType.RBRACE); } private void ignoreValue() { if ( isPropertyName() ) { // Just a string value so can discard and then check for the // subsequent comma nextToken(); checkComma(TokenType.RBRACE); } else if ( lookingAt(TokenType.DECIMAL) || lookingAt(TokenType.INTEGER) || lookingAt(TokenType.DOUBLE) || lookingAt(TokenType.KEYWORD) ) { // Just a numeric/keyword (boolean) value do discard and check for // subsequent comma nextToken(); checkComma(TokenType.RBRACE); } else if ( lookingAt(TokenType.LBRACE) ) { // Start of an Object nextToken(); // TODO We should really care about the syntactic validity of // objects we are ignoring but that seems like a bit too much effort int openBraces = 1; while (openBraces >= 1) { Token next = nextToken(); if ( next.getType().equals(TokenType.LBRACE) ) { openBraces++; } else if ( next.getType().equals(TokenType.RBRACE) ) { openBraces--; } } checkComma(TokenType.RBRACE); } else if ( lookingAt(TokenType.LBRACKET) ) { // Start of an Array nextToken(); // TODO We should really care about the syntactic validity of // objects we are ignoring but that seems like a bit too much effort int openBraces = 1; while (openBraces >= 1) { Token next = nextToken(); if ( next.getType().equals(TokenType.LBRACKET) ) { openBraces++; } else if ( next.getType().equals(TokenType.RBRACKET) ) { openBraces--; } } checkComma(TokenType.RBRACE); } else { exception(peekToken(), "Unexpected Token"); } } /** * Caches the first N results so we can infer variables, indicates whether * the caching exhausted the result set * * @param n * Number of results to cache */ private boolean cacheResults(int n) { for ( int i = 0 ; i < n ; i++ ) { if ( parseNextBinding() ) { this.cache.add(this.binding); this.binding = null; } else { return true; } } return false; } private void parseBoolean() { isBooleanResults = true; if ( lookingAt(TokenType.KEYWORD) ) { Token t = nextToken(); String keyword = t.getImage(); if ( keyword.equals("true") ) { boolResult = true; } else if ( keyword.equals("false") ) { boolResult = false; } else { exception(t, "Unexpected keyword %s encountered, expected true or false", keyword); } } else { exception(peekToken(), "Unexpected token when a true/false keyword was expected for the value of the boolean property"); } } @Override public void output(IndentedWriter out, SerializationContext sCxt) { // Not needed - only called as part of printing/debugging query plans. out.println("JSONInputIterator"); } @Override protected boolean hasNextBinding() { if ( isBooleanResults ) return false; if ( this.input != null ) { if ( this.cache.size() > 0 ) { this.binding = this.cache.remove(); return true; } else if ( this.binding == null ) { return this.parseNextBinding(); } else { return true; } } else { return false; } } private boolean parseNextBinding() { if ( lookingAt(TokenType.LBRACE) ) { nextToken(); BindingMap b = BindingFactory.create(); do { if ( isPropertyName() ) { Token t = nextToken(); String var = t.getImage(); checkColon(); Node n = parseNode(); b.add(Var.alloc(var), n); checkComma(TokenType.RBRACE); } else if ( lookingAt(TokenType.RBRACE) ) { nextToken(); checkComma(TokenType.RBRACKET); break; } else { exception(peekToken(), "Unexpected Token encountered, expected a property name to indicate the value for a variable"); } } while (true); this.binding = b; return true; } else if ( lookingAt(TokenType.RBRACKET) ) { // End of Bindings Array nextToken(); if ( lookingAt(TokenType.RBRACE) ) { nextToken(); parseToEnd(); } else { exception(peekToken(), "Unexpected Token encountered, expected a } to end the results object"); } } else { exception(peekToken(), "Unexpected Token encountered, expected a { for the start of a binding of ] to end the array of bindings"); } return false; } private Node parseNode() { String type, value, lang, datatype; type = value = lang = datatype = null; if ( lookingAt(TokenType.LBRACE) ) { Token pos = nextToken(); // Collect the Properties do { if ( isPropertyName() ) { Token t = nextToken(); String name = t.getImage(); checkColon(); if ( name.equals("type") ) { if ( type != null ) exception(t, "Illegal duplicate type property"); type = parseNodeInfo("type"); } else if ( name.equals("value") ) { if ( value != null ) exception(t, "Illegal duplicate value property"); value = parseNodeInfo("value"); } else if ( name.equals("datatype") ) { if ( datatype != null ) exception(t, "Illegal duplicate datatype property"); datatype = parseNodeInfo("datatype"); } else if ( name.equals("xml:lang") ) { if ( lang != null ) exception(t, "Illegal duplicate xml:lang property"); lang = parseNodeInfo("xml:lang"); } else { exception(t, "Unexpected Property Name '%s', expected one of type, value, datatype or xml:lang", name); } } else if ( lookingAt(TokenType.RBRACE) ) { nextToken(); break; } else { exception(peekToken(), "Unexpected Token, expected a property name as part of a Node object"); } } while (true); // Error if missing type or value if ( type == null ) exception(pos, "Encountered a Node object with no type property"); if ( value == null ) exception(pos, "Encountered a Node object with no value property"); // Generate a Node based on the properties we saw if ( type.equals("uri") ) { return NodeFactory.createURI(value); } else if ( type.equals("literal") ) { if ( datatype != null ) { return NodeFactory.createLiteral(value, TypeMapper.getInstance().getSafeTypeByName(datatype)); } else if ( lang != null ) { return NodeFactory.createLiteral(value, lang); } else { return NodeFactory.createLiteral(value); } } else if ( type.equals("bnode") ) { return NodeFactory.createBlankNode(value); } else { exception(pos, "Encountered a Node object with an invalid type value '%s', expected one of uri, literal or bnode", type); } } else { exception(peekToken(), "Unexpected Token, expected a { for the start of a Node object"); } return null; } private String parseNodeInfo(String name) { if ( lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2) ) { Token t = nextToken(); String value = t.getImage(); checkComma(TokenType.RBRACE); return value; } else { exception(peekToken(), "Unexpected Token, expected a string as the value for the %s property", name); return null; } } @Override protected Binding moveToNextBinding() { if ( !hasNext() ) throw new NoSuchElementException(); Binding b = this.binding; this.binding = null; return b; } @Override protected void closeIterator() { IO.close(input); input = null; } @Override protected void requestCancel() { // Don't need to do anything special to cancel // Superclass should take care of that and call closeIterator() where we // do our actual clean up } // JSON Parsing Helpers taken from LangRDFJSON private boolean isPropertyName() { return lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2); } private Token checkValidForStringProperty(String property) { Token t = null; if ( lookingAt(TokenType.STRING1) || lookingAt(TokenType.STRING2) ) { t = nextToken(); } else { exception(peekToken(), "JSON Values given for property " + property + " must be Strings"); } return t; } private void checkColon() { if ( !lookingAt(TokenType.COLON) ) { exception(peekToken(), "Expected a : character after a JSON Property Name but got %s", peekToken()); } nextToken(); } private void checkComma(TokenType terminator) { if ( lookingAt(TokenType.COMMA) ) { nextToken(); } else if ( lookingAt(terminator) ) { return; } else { exception(peekToken(), "Unexpected Token encountered, expected a , or a %s", terminator); } } // Streaming Parsing Helper Functions nicked from LangEngine // ---- Managing tokens. protected final Token peekToken() { // Avoid repeating. if ( eof() ) return tokenEOF; return peekIter.peek(); } // Set when we get to EOF to record line/col of the EOF. private Token tokenEOF = null; protected final boolean eof() { if ( tokenEOF != null ) return true; if ( !moreTokens() ) { tokenEOF = new Token(tokens.getLine(), tokens.getColumn()); tokenEOF.setType(TokenType.EOF); return true; } return false; } protected final boolean moreTokens() { return peekIter.hasNext(); } protected final boolean lookingAt(TokenType tokenType) { if ( eof() ) return tokenType == TokenType.EOF; if ( tokenType == TokenType.NODE ) return peekToken().isNode(); return peekToken().hasType(tokenType); } // Remember line/col of last token for messages protected long currLine = -1; protected long currCol = -1; protected final Token nextToken() { if ( eof() ) return tokenEOF; // Tokenizer errors appear here! try { Token t = peekIter.next(); currLine = t.getLine(); currCol = t.getColumn(); return t; } catch (RiotParseException ex) { // Intercept to log it. raiseException(ex); throw ex; } catch (AtlasException ex) { // Bad I/O RiotParseException ex2 = new RiotParseException(ex.getMessage(), -1, -1); raiseException(ex2); throw ex2; } } protected final void expectOrEOF(String msg, TokenType tokenType) { // DOT or EOF if ( eof() ) return; expect(msg, tokenType); } protected final void expect(String msg, TokenType ttype) { if ( !lookingAt(ttype) ) { Token location = peekToken(); exception(location, msg); } nextToken(); } protected final void exception(Token token, String msg, Object... args) { if ( token != null ) exceptionDirect(String.format(msg, args), token.getLine(), token.getColumn()); else exceptionDirect(String.format(msg, args), -1, -1); } protected final void exceptionDirect(String msg, long line, long col) { raiseException(new RiotParseException(msg, line, col)); } protected final void raiseException(RiotParseException ex) { throw new QueryException("Error passing SPARQL JSON results", ex); } }