/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.sparql.resultset; import java.io.InputStream ; import java.util.ArrayList ; import java.util.Iterator ; import java.util.List ; import java.util.function.Function; import org.apache.jena.atlas.csv.CSVParser ; import org.apache.jena.atlas.iterator.Iter ; import org.apache.jena.atlas.logging.FmtLog ; import org.apache.jena.graph.Node ; import org.apache.jena.graph.NodeFactory ; import org.apache.jena.query.ResultSet ; import org.apache.jena.sparql.ARQException ; import org.apache.jena.sparql.core.Var ; import org.apache.jena.sparql.engine.ResultSetStream ; import org.apache.jena.sparql.engine.binding.Binding ; import org.apache.jena.sparql.engine.binding.BindingFactory ; import org.apache.jena.sparql.engine.binding.BindingMap ; import org.slf4j.Logger ; import org.slf4j.LoggerFactory ; /** Convenient comma separated values - see also TSV (tab separated values) * which outputs full RDF terms (in Turtle-style). * * The CSV format supported is: * <ul> * <li>First row is variable names without '?'</li> * <li>Strings, quoted if necessary and numbers output only. * No language tags, or datatypes. * URIs are send without $lt;> * </li> * CSV is RFC 4180, but there are many variations. * </ul> * This code reads the file and treats everything as strings. * <p> * The code also allows for parsing boolean results where we expect the header to be a single string * from the set: true yes false no * </p> * <p> * Any other value is considered an error for parsing a boolean results and anything past the first line is ignored * </p> */ public class CSVInput { // This code exists to support the SPARQL WG tests. private static Logger log = LoggerFactory.getLogger(CSVInput.class) ; public static ResultSet fromCSV(InputStream in) { CSVParser parser = CSVParser.create(in) ; final List<Var> vars = vars(parser) ; List<String> varNames = Var.varNames(vars) ; Function<List<String>, Binding> transform = new Function<List<String>, Binding>(){ private int count = 1 ; @Override public Binding apply(List<String> row) { if ( row.size() != vars.size() ) FmtLog.warn(log, "Row %d: Length=%d: expected=%d", count, row.size(), vars.size()) ; BindingMap binding = BindingFactory.create() ; // Check. for (int i = 0 ; i < vars.size() ; i++ ) { Var v = vars.get(i) ; String field = (i<row.size()) ? row.get(i) : "" ; Node n = NodeFactory.createLiteral(field) ; binding.add(v, n); } count++ ; return binding ; }} ; Iterator<Binding> bindings = Iter.map(parser.iterator(), transform) ; //Generate an instance of ResultSetStream using TSVInputIterator //This will parse actual result rows as needed thus minimising memory usage return new ResultSetStream(varNames, null, bindings); } private static List<Var> vars(CSVParser parser) { final List<Var> vars = new ArrayList<>(); List<String> varNames = parser.parse1() ; if ( varNames == null ) throw new ARQException("SPARQL CSV Results malformed, input is empty"); for ( String vn : varNames ) { vars.add(Var.alloc(vn)) ; } return vars ; } public static boolean booleanFromCSV(InputStream in) { CSVParser parser = CSVParser.create(in) ; final List<Var> vars = vars(parser) ; if ( vars.size() != 1 ) { throw new ARQException("CSV Boolean Results malformed: variables line='"+vars+"'") ; } if ( ! vars.get(0).getName().equals("_askResult")) { FmtLog.warn(log, "Boolean result variable is '%s', not '_askResult'", vars.get(0).getName()) ; } List<String> line = parser.parse1() ; if ( line.size() != 1 ) { throw new ARQException("CSV Boolean Results malformed: data line='"+line+"'") ; } String str = line.get(0) ; boolean b ; if ( str.equalsIgnoreCase("true") || str.equalsIgnoreCase("yes") ) b = true ; else if (str.equalsIgnoreCase("false") || str.equalsIgnoreCase("no")) b = false; else { throw new ARQException("CSV Boolean Results malformed, expected one of - true yes false no - but got " + str); } List<String> line2 = parser.parse1() ; if ( line2 != null ) { FmtLog.warn(log, "Extra rows: first is "+line2) ; } return b ; } }