/*
* Copyright 2013 Eric F. Savage, code@efsavage.com
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.ajah.flatfile;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import lombok.Getter;
import lombok.Setter;
import lombok.extern.java.Log;
import com.ajah.util.StringUtils;
/**
* Reads a flat data file into a structured iterator.
*
* @author <a href="http://efsavage.com">Eric F. Savage</a>,
* <a href="mailto:code@efsavage.com">code@efsavage.com</a>.
*/
@Log
public class FlatFileReader implements Closeable, Iterable<FlatFileRow>, Iterator<FlatFileRow> {
private final BufferedReader reader;
@Getter
private final List<FlatFileColumn> columns = new ArrayList<>();
private int columnCount = 0;
private final Map<String, FlatFileColumn> map = new HashMap<>();
private FlatFileRow row = null;
@Getter
private final FlatFileFormat format;
@Getter
@Setter
private boolean flushEveryLine;
/**
* Excel will wrap any field that contains a space or other character with
* double quotes even when saving in tab-delimited format.
*/
@Getter
@Setter
private boolean stripWrappedQuotes;
@Getter
@Setter
private boolean trimContents = true;
private int lineNumber;
/**
* Constructs a reader from a file.
*
* @param format
* The format of the file.
* @param bufferedReader
* The buffered reader for the file.
* @throws IOException
* If the file could not be read.
*/
public FlatFileReader(final FlatFileFormat format, final BufferedReader bufferedReader) throws IOException {
this.format = format;
this.reader = bufferedReader;
final String header = this.reader.readLine();
this.lineNumber++;
if (header != null) {
createColumns(header);
}
}
/**
* Constructs a reader from a file.
*
* @param format
* The format of the file.
* @param file
* The file.
* @throws IOException
* If the file could not be read.
*/
@SuppressWarnings("resource")
public FlatFileReader(final FlatFileFormat format, final File file) throws IOException {
this(format, new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF8")));
}
/**
* Constructs a reader from a file.
*
* @param format
* The format of the file.
* @param inputStream
* The input stream for the file.
* @throws IOException
* If the file could not be read.
*/
public FlatFileReader(final FlatFileFormat format, final InputStream inputStream) throws IOException {
this(format, new BufferedReader(new InputStreamReader(inputStream)));
}
private void addColumn(final String name) {
if (this.map.get(name) != null) {
throw new IllegalArgumentException("Duplicate column name " + name);
}
final FlatFileColumn column = new FlatFileColumn(name, null, false);
this.columns.add(column);
this.map.put(column.getName().trim(), column);
this.columnCount = this.columns.size();
log.fine("Created column \"" + name + "\"");
}
@Override
public void close() throws IOException {
this.reader.close();
}
private void createColumns(final String header) {
log.fine("Adding columns");
switch (this.format) {
case CSV: {
final String[] names = header.split(",(?=([^\"]*\"[^\"]*\")*[^\"]*$)");
for (final String name : names) {
if (name.matches("\".*\"")) {
addColumn(name.substring(1, name.length() - 1));
} else {
addColumn(name);
}
}
break;
}
case TAB: {
final String[] names = header.split("\t");
for (final String name : names) {
addColumn(name);
}
break;
}
default:
throw new UnsupportedOperationException();
}
}
/**
* Determines if this reader has a column available.
*
* @param column
* The column name to check.
* @return true if the column appears in the file, otherwise false.
*/
public boolean hasColumn(final String column) {
return this.map.get(column) != null;
}
@Override
public boolean hasNext() {
try {
return this.reader.ready();
} catch (final IOException e) {
return false;
}
}
@Override
public Iterator<FlatFileRow> iterator() {
return this;
}
@Override
public FlatFileRow next() {
this.row = new FlatFileRow(this.map, this, this.lineNumber++);
this.row.setStripWrappedQuotes(this.stripWrappedQuotes);
String line;
try {
line = this.reader.readLine();
} catch (final IOException e) {
return null;
}
switch (this.format) {
case CSV: {
int currentPos = 0;
boolean inField = false;
final char[] chars = line.toCharArray();
final int len = chars.length;
for (int i = 0; i < this.columns.size() && currentPos < len; i++) {
final char c = chars[currentPos];
if (c == ',' || c == '"') {
// Skip
} else {
if (inField) {
currentPos++;
inField = true;
}
}
}
if (line.contains("\"\"")) {
throw new UnsupportedOperationException("Line contains double quote: " + line);
}
final String[] values = line.split(",");
for (int i = 0; i < values.length && i < columnCount; i++) {
if (StringUtils.isBlank(values[i])) {
this.row.set(this.columns.get(i).getName(), "");
} else if (values[i].charAt(0) == '"' && values[i].charAt(values[i].length() - 1) == '"') {
this.row.set(this.columns.get(i).getName(), values[i].substring(1, values[i].length() - 2));
} else {
this.row.set(this.columns.get(i).getName(), values[i]);
}
}
break;
}
case TAB: {
final String[] values = line.split("\t");
for (int i = 0; i < values.length; i++) {
if (i + 1 > this.columns.size()) {
continue;
}
this.row.set(this.columns.get(i).getName(), values[i]);
}
break;
}
default:
throw new UnsupportedOperationException();
}
return this.row;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
/**
* Validates that required columns are present in the file.
*
* @param requiredColumns
* The array of required columns, may be empty or null.
* @throws MissingColumnException
* Thrown if columns are missing.
*/
public void require(final String... requiredColumns) throws MissingColumnException {
if (requiredColumns == null || requiredColumns.length < 1) {
return;
}
final ArrayList<String> missingColumns = new ArrayList<>();
for (final String requiredColumn : requiredColumns) {
if (!hasColumn(requiredColumn)) {
missingColumns.add(requiredColumn);
}
}
if (missingColumns.size() > 0) {
throw new MissingColumnException(StringUtils.join(",", missingColumns), missingColumns);
}
}
/**
* Validates that the file only contains supported columns.
*
* @param supportedColumns
* The columns supported by whatever is processing the file.
* @throws UnsupportedColumnException
* If the file contains unsupported columns.
*/
public void validate(final String[] supportedColumns) throws UnsupportedColumnException {
if (supportedColumns == null || supportedColumns.length < 1) {
throw new UnsupportedColumnException(StringUtils.join(",", this.map.keySet()), this.map.keySet());
}
final Set<String> unsupportedColumns = new HashSet<>();
columnLoop: for (final String column : this.map.keySet()) {
for (final String supportedColumn : supportedColumns) {
if (column.equals(supportedColumn)) {
continue columnLoop;
}
}
unsupportedColumns.add(column);
}
if (unsupportedColumns.size() > 0) {
throw new UnsupportedColumnException(StringUtils.join(",", unsupportedColumns), unsupportedColumns);
}
}
}