/** * Copyright (C) 2016 - present by OpenGamma Inc. and the OpenGamma group of companies * * Please see distribution for license. */ package com.opengamma.strata.collect.io; import java.io.BufferedReader; import java.io.IOException; import java.io.Reader; import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.List; import java.util.NoSuchElementException; import java.util.Spliterator; import java.util.Spliterators; import java.util.stream.Stream; import java.util.stream.StreamSupport; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import com.google.common.collect.PeekingIterator; import com.google.common.io.CharSource; import com.opengamma.strata.collect.ArgChecker; import com.opengamma.strata.collect.Unchecked; /** * Iterator over the rows of a CSV file. * <p> * Provides the ability to iterate over a CSV file together with the ability to parse it from a {@link CharSource}. * The separator may be specified, allowing TSV files (tab-separated) and other similar formats to be parsed. * See {@link CsvFile} for more details of the CSV format. * <p> * This class processes the CSV file row-by-row. * To load the entire CSV file into memory, use {@link CsvFile}. * <p> * This class must be used in a try-with-resources block to ensure that the underlying CSV file is closed: * <pre> * try (CsvIterator csvIterator = CsvIterator.of(source, true)) { * // use the CsvIterator * } * </pre> * One way to use the iterable is with the for-each loop, using a lambda to adapt {@code Iterator} to {@code Iterable}: * <pre> * try (CsvIterator csvIterator = CsvIterator.of(source, true)) { * for (CsvRow row : () -> csvIterator) { * // process the row * } * } * </pre> * This class also allows the headers to be obtained without reading the whole CSV file: * <pre> * try (CsvIterator csvIterator = CsvIterator.of(source, true)) { * ImmutableList{@literal <String>} headers = csvIterator.headers(); * } * </pre> */ public final class CsvIterator implements AutoCloseable, PeekingIterator<CsvRow> { /** * The buffered reader. */ private final BufferedReader reader; /** * The separator */ private final char separator; /** * The header row, ordered as the headers appear in the file. */ private final ImmutableList<String> headers; /** * The header map, transformed for case-insensitive searching. */ private final ImmutableMap<String, Integer> searchHeaders; /** * The next row. */ private CsvRow nextRow; //------------------------------------------------------------------------ /** * Parses the specified source as a CSV file, using a comma as the separator. * <p> * This method opens the CSV file for reading. * The caller is responsible for closing it by calling {@link #close()}. * * @param source the CSV file resource * @param headerRow whether the source has a header row, an empty source must still contain the header * @return the CSV file * @throws UncheckedIOException if an IO exception occurs * @throws IllegalArgumentException if the file cannot be parsed */ public static CsvIterator of(CharSource source, boolean headerRow) { return of(source, headerRow, ','); } /** * Parses the specified source as a CSV file where the separator is specified and might not be a comma. * <p> * This overload allows the separator to be controlled. * For example, a tab-separated file is very similar to a CSV file, the only difference is the separator. * <p> * This method opens the CSV file for reading. * The caller is responsible for closing it by calling {@link #close()}. * * @param source the file resource * @param headerRow whether the source has a header row, an empty source must still contain the header * @param separator the separator used to separate each field, typically a comma, but a tab is sometimes used * @return the CSV file * @throws UncheckedIOException if an IO exception occurs * @throws IllegalArgumentException if the file cannot be parsed */ public static CsvIterator of(CharSource source, boolean headerRow, char separator) { ArgChecker.notNull(source, "source"); @SuppressWarnings("resource") BufferedReader reader = Unchecked.wrap(() -> source.openBufferedStream()); return create(reader, headerRow, separator); } /** * Parses the specified reader as a CSV file, using a comma as the separator. * <p> * This factory method allows the separator to be controlled. * For example, a tab-separated file is very similar to a CSV file, the only difference is the separator. * <p> * The caller is responsible for closing the reader, such as by calling {@link #close()}. * * @param reader the file reader * @param headerRow whether the source has a header row, an empty source must still contain the header * @return the CSV file * @throws UncheckedIOException if an IO exception occurs * @throws IllegalArgumentException if the file cannot be parsed */ public static CsvIterator of(Reader reader, boolean headerRow) { return of(reader, headerRow, ','); } /** * Parses the specified reader as a CSV file where the separator is specified and might not be a comma. * <p> * This factory method allows the separator to be controlled. * For example, a tab-separated file is very similar to a CSV file, the only difference is the separator. * <p> * The caller is responsible for closing the reader, such as by calling {@link #close()}. * * @param reader the file reader * @param headerRow whether the source has a header row, an empty source must still contain the header * @param separator the separator used to separate each field, typically a comma, but a tab is sometimes used * @return the CSV file * @throws UncheckedIOException if an IO exception occurs * @throws IllegalArgumentException if the file cannot be parsed */ public static CsvIterator of(Reader reader, boolean headerRow, char separator) { ArgChecker.notNull(reader, "reader"); @SuppressWarnings("resource") BufferedReader breader = reader instanceof BufferedReader ? (BufferedReader) reader : new BufferedReader(reader); return create(breader, headerRow, separator); } // create the iterator private static CsvIterator create(BufferedReader breader, boolean headerRow, char separator) { try { if (!headerRow) { return new CsvIterator(breader, separator, ImmutableList.of(), ImmutableMap.of()); } String line = breader.readLine(); if (line == null) { throw new IllegalArgumentException("Could not read header row from empty CSV file"); } ImmutableList<String> headers = CsvFile.parseLine(line, separator); return new CsvIterator(breader, separator, headers, CsvFile.buildSearchHeaders(headers)); } catch (RuntimeException ex) { try { breader.close(); } catch (IOException ex2) { ex.addSuppressed(ex2); } throw ex; } catch (IOException ex) { try { breader.close(); } catch (IOException ex2) { ex.addSuppressed(ex2); } throw new UncheckedIOException(ex); } } //------------------------------------------------------------------------ /** * Restricted constructor. * * @param reader the buffered reader * @param headers the header row * @param searchHeaders the search headers */ private CsvIterator( BufferedReader reader, char separator, ImmutableList<String> headers, ImmutableMap<String, Integer> searchHeaders) { this.reader = reader; this.separator = separator; this.headers = headers; this.searchHeaders = searchHeaders; } //------------------------------------------------------------------------ /** * Gets the header row. * <p> * If there is no header row, an empty list is returned. * * @return the header row */ public ImmutableList<String> headers() { return headers; } /** * Returns a stream that wraps this iterator. * <p> * The stream will process any remaining rows in the CSV file. * As such, it is recommended that callers should use this method or the iterator methods and not both. * * @return the stream wrapping this iterator */ public Stream<CsvRow> asStream() { Spliterator<CsvRow> spliterator = Spliterators.spliteratorUnknownSize(this, Spliterator.ORDERED | Spliterator.NONNULL); return StreamSupport.stream(spliterator, false); } //------------------------------------------------------------------------- /** * Checks whether there is another row in the CSV file. * * @return true if there is another row, false if not * @throws UncheckedIOException if an IO exception occurs * @throws IllegalArgumentException if the file cannot be parsed */ @Override public boolean hasNext() { if (nextRow != null) { return true; } else { String line = null; while ((line = Unchecked.wrap(() -> reader.readLine())) != null) { ImmutableList<String> fields = CsvFile.parseLine(line, separator); if (!fields.isEmpty()) { nextRow = new CsvRow(headers, searchHeaders, fields); return true; } } return false; } } /** * Peeks the next row from the CSV file without changing the iteration position. * * @return the peeked row * @throws UncheckedIOException if an IO exception occurs * @throws IllegalArgumentException if the file cannot be parsed * @throws NoSuchElementException if the end of file has been reached */ @Override public CsvRow peek() { if (nextRow != null || hasNext()) { return nextRow; } else { throw new NoSuchElementException("CsvIterator has reached the end of the file"); } } /** * Returns the next row from the CSV file. * * @return the next row * @throws UncheckedIOException if an IO exception occurs * @throws IllegalArgumentException if the file cannot be parsed * @throws NoSuchElementException if the end of file has been reached */ @Override public CsvRow next() { if (nextRow != null || hasNext()) { CsvRow row = nextRow; nextRow = null; return row; } else { throw new NoSuchElementException("CsvIterator has reached the end of the file"); } } /** * Returns the next batch of rows from the CSV file. * <p> * This will return up to the specified number of rows from the file at the current iteration point. * An empty list is returned if there are no more rows. * * @param count the number of rows to try and get, negative returns an empty list * @return the next batch of rows, up to the number requested * @throws UncheckedIOException if an IO exception occurs * @throws IllegalArgumentException if the file cannot be parsed */ public List<CsvRow> nextBatch(int count) { List<CsvRow> rows = new ArrayList<>(); for (int i = 0; i < count; i++) { if (hasNext()) { rows.add(next()); } } return rows; } /** * Throws an exception as remove is not supported. * * @throws UnsupportedOperationException always */ @Override public void remove() { throw new UnsupportedOperationException("CsvIterator does not support remove()"); } /** * Closes the underlying reader. * * @throws UncheckedIOException if an IO exception occurs */ @Override public void close() { Unchecked.wrap(() -> reader.close()); } //------------------------------------------------------------------------- /** * Returns a string describing the CSV iterator. * * @return the descriptive string */ @Override public String toString() { return "CsvIterator" + headers.toString(); } }