/** * Copyright (c) 2014 by the original author or authors. * * This code is free software; you can redistribute it and/or modify it under the terms of the * GNU Lesser General Public License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * The above copyright notice and this permission notice shall be included in all copies or * substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING * BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package ch.sdi.core.impl.parser; import java.io.BufferedReader; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Scanner; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.springframework.stereotype.Component; import org.springframework.util.StringUtils; import ch.sdi.core.exc.SdiException; import ch.sdi.core.impl.data.filter.RawDataFilterString; import ch.sdi.report.ReportMsg; /** * A Parser for CSV files. * * @version 1.0 (01.11.2014) * @author Heri */ @Component public class CsvParser { /** logger for this class */ private Logger myLog = LogManager.getLogger( CsvParser.class ); /** * Parses the given input stream. * <p> * * @param aInputStream * must not be null * @param aDelimiter * must not be null * @param aEncoding * The encoding to be used. If null or empty, the systems default encoding is used. * @return a list which contains a list for each found person. The inner list contains the found * values for this person. The number and the order must correspond to the configured field * name list (see * in each line. * @throws SdiException */ public List<List<String>> parse( InputStream aInputStream, String aDelimiter, String aEncoding ) throws SdiException { return parse( aInputStream, aDelimiter, aEncoding, null ); } /** * Parses the given input stream. * <p> * * @param aInputStream * must not be null * @param aDelimiter * must not be null * @param aEncoding * The encoding to be used. If null or empty, the systems default encoding is used. * @return a list which contains a list for each found person. The inner list contains the found * values for this person. The number and the order must correspond to the configured field * name list (see * in each line. * @throws SdiException */ public List<List<String>> parse( InputStream aInputStream, String aDelimiter, String aEncoding, List<RawDataFilterString> aFilters ) throws SdiException { if ( !StringUtils.hasLength( aDelimiter ) ) { throw new SdiException( "Delimiter not set", SdiException.EXIT_CODE_CONFIG_ERROR ); } // if myDelimiter == null try { myLog.debug( "Using encoding " + aEncoding ); BufferedReader br = new BufferedReader( !StringUtils.hasText( aEncoding ) ? new InputStreamReader( aInputStream ) : new InputStreamReader( aInputStream, aEncoding ) ); List<List<String>> result = new ArrayList<>(); Collection<String> myLinesFiltered = new ArrayList<>(); int lineNo = 0; String line; LineLoop: while ( ( line = br.readLine() ) != null ) { lineNo++; if ( aFilters != null ) { for ( RawDataFilterString filter : aFilters ) { if ( filter.isFiltered( line ) ) { myLog.debug( "Skipping commented line: " + line ); myLinesFiltered.add( line ); continue LineLoop; } } } myLog.debug( "Parsing line " + lineNo + ": " + line ); List<String> list = new ArrayList<String>(); Scanner sc = new Scanner( line ); try { sc.useDelimiter( aDelimiter ); while ( sc.hasNext() ) { list.add( sc.next() ); } // Note: if the line is terminated by the delimiter (last entry not present, the last entry // will not appear in the scanned enumeration. Check for this special case: if ( line.endsWith( aDelimiter ) ) { list.add( "" ); } // if line.endsWith( aDelimiter ) } finally { sc.close(); } result.add( list ); } myLog.info( new ReportMsg( ReportMsg.ReportType.PREPARSE_FILTER, "Filtered lines", myLinesFiltered ) ); return result; } catch ( Throwable t ) { throw new SdiException( "Problems while parsing CSV file", t, SdiException.EXIT_CODE_PARSE_ERROR ); } } }