package org.gbif.ipt.model;
import org.gbif.utils.file.csv.CSVReader;
import org.gbif.utils.file.csv.CSVReaderFactory;
import org.gbif.ipt.utils.FileUtils;
import org.gbif.utils.file.ClosableReportingIterator;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Set;
import org.apache.log4j.Logger;
/**
* A delimited text file based source such as CSV or tab files.
*/
public class TextFileSource extends SourceBase implements FileSource {
private static final Logger LOG = Logger.getLogger(TextFileSource.class);
private static final String SUFFIX = ".txt";
private String fieldsTerminatedBy = "\t";
private String fieldsEnclosedBy;
private int ignoreHeaderLines = 0;
private File file;
private long fileSize;
private int rows;
protected Date lastModified;
private String escape(String x) {
if (x == null) {
return null;
}
return x.replaceAll("\\t", "\\\\t").replaceAll("\\n", "\\\\n").replaceAll("\\r", "\\\\r")
.replaceAll("\\f", "\\\\f");
}
public Character getFieldQuoteChar() {
if (fieldsEnclosedBy == null || fieldsEnclosedBy.length() == 0) {
return null;
}
return fieldsEnclosedBy.charAt(0);
}
public String getFieldsEnclosedBy() {
return fieldsEnclosedBy;
}
public String getFieldsEnclosedByEscaped() {
return escape(fieldsEnclosedBy);
}
public String getFieldsTerminatedBy() {
return fieldsTerminatedBy;
}
public String getFieldsTerminatedByEscaped() {
return escape(fieldsTerminatedBy);
}
public File getFile() {
return file;
}
public long getFileSize() {
return fileSize;
}
public String getFileSizeFormatted() {
return FileUtils.formatSize(fileSize, 1);
}
public int getIgnoreHeaderLines() {
return ignoreHeaderLines;
}
public Date getLastModified() {
return lastModified;
}
private CSVReader getReader() throws IOException {
return CSVReaderFactory.build(file, encoding, fieldsTerminatedBy, getFieldQuoteChar(), ignoreHeaderLines);
}
public int getRows() {
return rows;
}
public ClosableReportingIterator<String[]> rowIterator() {
try {
CSVReader reader = getReader();
return reader.iterator();
} catch (IOException e) {
LOG.warn("Exception caught", e);
}
return null;
}
public List<String> columns() {
try {
CSVReader reader = getReader();
if (ignoreHeaderLines > 0) {
List<String> columns = Arrays.asList(reader.header);
reader.close();
return columns;
} else {
List<String> columns = new ArrayList<String>();
// careful - the reader.header can be null. In this case set number of columns to 0
int numColumns = (reader.header == null) ? 0 : reader.header.length;
for (int x = 1; x <= numColumns; x++) {
columns.add("Column #" + x);
}
reader.close();
return columns;
}
} catch (IOException e) {
LOG.warn("Cant read source " + getName(), e);
}
return new ArrayList<String>();
}
public void setFieldsEnclosedBy(String fieldsEnclosedBy) {
this.fieldsEnclosedBy = fieldsEnclosedBy;
}
public void setFieldsEnclosedByEscaped(String fieldsEnclosedBy) {
this.fieldsEnclosedBy = unescape(fieldsEnclosedBy);
}
public void setFieldsTerminatedBy(String fieldsTerminatedBy) {
this.fieldsTerminatedBy = fieldsTerminatedBy;
}
public void setFieldsTerminatedByEscaped(String fieldsTerminatedBy) {
this.fieldsTerminatedBy = unescape(fieldsTerminatedBy);
}
public void setFile(File file) {
this.file = file;
}
public void setFileSize(long fileSize) {
this.fileSize = fileSize;
}
public void setIgnoreHeaderLines(Integer ignoreHeaderLines) {
this.ignoreHeaderLines = ignoreHeaderLines == null ? 0 : ignoreHeaderLines;
}
public void setLastModified(Date lastModified) {
this.lastModified = lastModified;
}
public void setRows(int rows) {
this.rows = rows;
}
public String getPreferredFileSuffix() {
return SUFFIX;
}
public Set<Integer> analyze() throws IOException {
setFileSize(getFile().length());
CSVReader reader = getReader();
while (reader.hasNext()) {
reader.next();
}
setColumns(reader.header == null ? 0 : reader.header.length);
setRows(reader.getReadRows());
setReadable(true);
Set<Integer> emptyLines = reader.getEmptyLines();
reader.close();
return emptyLines;
}
private String unescape(String x) {
if (x == null) {
return null;
}
return x.replaceAll("\\\\t", String.valueOf('\t')).replaceAll("\\\\n", String.valueOf('\n'))
.replaceAll("\\\\r", String.valueOf('\r')).replaceAll("\\\\f", String.valueOf('\f'));
}
}