/** * Copyright (C) 2012 - present by OpenGamma Inc. and the OpenGamma group of companies * * Please see distribution for license. */ package com.opengamma.util.csv; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.Iterator; import java.util.zip.GZIPInputStream; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.fudgemsg.FudgeContext; import org.fudgemsg.FudgeMsg; import org.fudgemsg.MutableFudgeMsg; import au.com.bytecode.opencsv.CSVParser; import au.com.bytecode.opencsv.CSVReader; import com.opengamma.OpenGammaRuntimeException; import com.opengamma.util.ArgumentChecker; import com.opengamma.util.fudgemsg.OpenGammaFudgeContext; /** * Provides an iterator for reading a CSV file that return each row as a fudge message * <p> * The first row of the CSV document is assumed to be the column headers. * The FudgeMessage returned by the iterator has the column headers as field names */ public final class CSVDocumentReader implements Iterable<FudgeMsg> { private URL _docUrl; private char _separator; private char _quotechar; private char _escape; private FudgeContext _fudgeContext; /** * Constructs CSVDocumentReader using a comma for the separator. * * @param docUrl the URL to the CSV source. */ public CSVDocumentReader(URL docUrl) { this(docUrl, CSVParser.DEFAULT_SEPARATOR, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER, OpenGammaFudgeContext.getInstance()); } /** * Constructs CSVDocumentReader with supplied separator. * * @param docUrl the URL to the CSV source. * @param separator the delimiter to use for separating entries. */ public CSVDocumentReader(URL docUrl, char separator) { this(docUrl, separator, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER, OpenGammaFudgeContext.getInstance()); } /** * Constructs CSVDocumentReader with supplied separator and quote char. * * @param docUrl the URL to the CSV source. * @param separator the delimiter to use for separating entries * @param quotechar the character to use for quoted elements */ public CSVDocumentReader(URL docUrl, char separator, char quotechar) { this(docUrl, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, OpenGammaFudgeContext.getInstance()); } /** * Constructs CSVDocumentReader with supplied separator, quote char and escape char. * * @param docUrl the URL to the CSV source. * @param separator the delimiter to use for separating entries * @param quotechar the character to use for quoted elements * @param escape the character to use for escaping a separator or quote * @param fudgeContext the fudgeContext, not null */ public CSVDocumentReader(URL docUrl, char separator, char quotechar, char escape, FudgeContext fudgeContext) { ArgumentChecker.notNull(docUrl, "file"); ArgumentChecker.notNull(separator, "separator"); ArgumentChecker.notNull(quotechar, "quotechar"); ArgumentChecker.notNull(escape, "escape"); ArgumentChecker.notNull(fudgeContext, "fudgeContext"); _docUrl = docUrl; _separator = separator; _quotechar = quotechar; _escape = escape; _fudgeContext = fudgeContext; } @Override public Iterator<FudgeMsg> iterator() { return new FudgeMsgCSVIterator(); } private class FudgeMsgCSVIterator implements Iterator<FudgeMsg> { private CSVReader _csvReader; private String[] _header; private String[] _currentRow; public FudgeMsgCSVIterator() { try { InputStream is = _docUrl.openStream(); if (_docUrl.getFile().endsWith(".gz")) { is = new GZIPInputStream(is); } _csvReader = new CSVReader(new BufferedReader(new InputStreamReader(is)), _separator, _quotechar, _escape); _header = _csvReader.readNext(); if (_header == null) { throw new OpenGammaRuntimeException("Column headers is missing, can not create iterator"); } else { trimColumnHeaders(); } } catch (IOException ex) { throw new OpenGammaRuntimeException("IO Exception trying to create an Iterator", ex); } } private void trimColumnHeaders() { for (int i = 0; i < _header.length; i++) { _header[i] = StringUtils.trim(_header[i]); } } @Override public boolean hasNext() { try { _currentRow = _csvReader.readNext(); } catch (IOException ex) { throw new OpenGammaRuntimeException("IO Exception trying to read next row", ex); } if (_currentRow == null) { IOUtils.closeQuietly(_csvReader); _csvReader = null; } return _currentRow != null; } @Override public FudgeMsg next() { MutableFudgeMsg currentMsg = _fudgeContext.newMessage(); int size = getMessageSize(); for (int i = 0; i < size; i++) { String currentRow = StringUtils.trimToNull(_currentRow[i]); if (currentRow != null) { currentMsg.add(_header[i], currentRow); } } return currentMsg; } private int getMessageSize() { int size = _header.length; if (_currentRow.length < size) { size = _currentRow.length; } return size; } @Override public void remove() { throw new UnsupportedOperationException("Cannot remove CSV row"); } @Override protected void finalize() throws Throwable { //close file inputstream if it is still hanging around if (_csvReader != null) { IOUtils.closeQuietly(_csvReader); } } } }