/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.jena.sparql.resultset; import java.io.BufferedReader ; import java.io.IOException ; import java.util.ArrayList ; import java.util.List ; import java.util.NoSuchElementException ; import org.apache.jena.atlas.io.IO ; import org.apache.jena.atlas.io.IndentedWriter ; import org.apache.jena.graph.NodeFactory ; import org.apache.jena.query.QueryException ; import org.apache.jena.sparql.core.Var ; import org.apache.jena.sparql.engine.binding.Binding ; import org.apache.jena.sparql.engine.binding.BindingFactory ; import org.apache.jena.sparql.engine.binding.BindingMap ; import org.apache.jena.sparql.engine.iterator.QueryIteratorBase ; import org.apache.jena.sparql.graph.NodeConst ; import org.apache.jena.sparql.serializer.SerializationContext ; /** * Class used to do streaming parsing of actual result rows from the CSV */ public class CSVInputIterator extends QueryIteratorBase { private BufferedReader reader; private BindingMap binding; private int expectedItems; private List<Var> vars; private long lineNum = 1; /** * Creates a new CSV Input Iterator * <p> * Assumes the Header Row has already been read and that the next row to be read from the reader will be a Result Row * </p> */ public CSVInputIterator(BufferedReader reader, List<Var> vars) { this.reader = reader; this.expectedItems = vars.size(); this.vars = vars; } @Override public void output(IndentedWriter out, SerializationContext sCxt) { // Not needed - only called as part of printing/debugging query plans. out.println("CSVInputIterator") ; } @Override protected boolean hasNextBinding() { if (this.reader != null) { if (this.binding == null) return this.parseNextBinding(); else return true; } else { return false; } } private boolean parseNextBinding() { String line; try { line = this.reader.readLine(); //Once EOF has been reached we'll see null for this call so we can return false because there are no further bindings if (line == null) return false; this.lineNum++; } catch (IOException e) { throw new QueryException("Error parsing CSV results - " + e.getMessage()); } if ( line.isEmpty() ) { // Empty input line - no bindings. // Only valid when we expect zero/one values as otherwise we should get a sequence of tab characters // which means a non-empty string which we handle normally if (expectedItems > 1) throw new QueryException(String.format("Error Parsing CSV results at Line %d - The result row had 0/1 values when %d were expected", this.lineNum, expectedItems)); binding = BindingFactory.create() ; if ( expectedItems == 1 ) binding.add(vars.get(0), NodeConst.emptyString) ; return true ; } binding = parseLine(vars, line) ; return true ; } private BindingMap parseLine(List<Var> vars, String line) { BindingMap binding = BindingFactory.create() ; List<String> terms = new ArrayList<>() ; int idx = 0 ; while(idx < line.length()) { char ch = line.charAt(idx) ; StringBuilder s = new StringBuilder() ; if ( ch == '\"' || ch == '\'' ) { char qCh = ch ; idx++ ; while(idx < line.length() ) { ch = line.charAt(idx) ; idx++ ; if ( ch == qCh ) break ; // escapes?? s.append(ch) ; } if ( ch != qCh ) throw new QueryException(String.format("Error Parsing CSV results at Line %d - Unterminated quoted string", this.lineNum)); if ( idx < line.length() ) { ch = line.charAt(idx) ; if ( ch != ',' ) throw new QueryException(String.format("Error Parsing CSV results at Line %d - Expected comma after quote", this.lineNum)) ; } } else { while(idx < line.length() ) { ch = line.charAt(idx) ; if ( ch == ',' ) break ; idx++ ; // escapes s.append(ch) ; } } terms.add(s.toString()) ; // At end of per-term processing, we are looking at "," or EOL. // Looking at , or EOL. if ( ch == ',' && idx==line.length()-1 ) { //EOL terms.add("") ; break ; } // Skip "," idx++ ; } if ( terms.size() != vars.size() ) throw new QueryException(String.format("Error Parsing CSV results at Line %d - The result row '%s' has %d items when %d was expected", this.lineNum, line, terms.size(), vars.size())) ; for ( int i = 0 ; i < vars.size() ; i++ ) binding.add(vars.get(i), NodeFactory.createLiteral(terms.get(i))) ; return binding ; } @Override protected Binding moveToNextBinding() { if (!hasNext()) throw new NoSuchElementException() ; Binding b = this.binding; this.binding = null ; return b; } @Override protected void closeIterator() { IO.close(reader) ; reader = null; } @Override protected void requestCancel() { //Don't need to do anything special to cancel //Superclass should take care of that and call closeIterator() where we do our actual clean up } }