package hu.sztaki.ilab.longneck.process.access; import com.csvreader.CsvReader; import hu.sztaki.ilab.longneck.Field; import hu.sztaki.ilab.longneck.Record; import hu.sztaki.ilab.longneck.RecordImpl; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.List; import java.util.Properties; import org.apache.log4j.Logger; /** * A data source reading CSV files. * * Built on OpenCSV. * * @author Molnar Peter <molnarp@sztaki.mta.hu> */ public class CsvSource implements Source { /** Logger. */ private final static Logger log = Logger.getLogger(CsvSource.class); /** The name of this source from property. */ private String name; /** The name of the source file. */ private String path; /** The delimiter used to separate the records. */ private Character delimiter = ';'; /** The character to use as a text qualifier in the data. */ private Character textQualifier; /** The character set to use when reading the file. */ private String characterSet = "UTF-8"; /** Column names. */ private String[] columns = null; /** The input files have headers. */ private boolean hasHeaders = true; /** The input files use textQualifier or not. */ private boolean useTextQualifier = true; /** The CSV reader. */ private CsvReader csvReader; /** The runtime properties. */ protected Properties runtimeProperties; /** The source path to read csv files from. */ private String sourcePath; /** The list of source files that are read. */ private List<String> sourceFiles; /** The current file in the list. */ private int currentFileIndex = -1; @Override public Record getRecord() throws NoMoreRecordsException { Record result = new RecordImpl(); try { String[] values = getNextRecordValues() ; while(values.length != columns.length ) { log.warn("Skipping input line, number of columns differs: " + columns.length + " (process definition) vs. " + values.length + " (CSV file line): \n" + csvReader.getRawRecord() ); values = getNextRecordValues() ; } for (int i = 0; i < values.length; ++i) { result.add(new Field(columns[i], values[i])); } } catch (IOException ex) { throw new RuntimeException(ex); } return result; } private String[] getNextRecordValues() throws IOException, NoMoreRecordsException { while (csvReader == null || ! csvReader.readRecord()) { try { nextFile(); } catch (CsvHeaderException ex) { log.warn(ex); continue; } } return csvReader.getValues(); } private void nextFile() throws NoMoreRecordsException, FileNotFoundException, CsvHeaderException, IOException { // Close current reader if available if (csvReader != null) { csvReader.close(); } // Set current source file ++currentFileIndex; if (sourceFiles.size() <= currentFileIndex) { throw new NoMoreRecordsException("No more records."); } log.info(String.format("Processing file: %1$s", sourceFiles.get(currentFileIndex))); // Create new reader csvReader = new CsvReader(sourceFiles.get(currentFileIndex), delimiter, Charset.forName(characterSet)); csvReader.setUseTextQualifier(useTextQualifier); if(textQualifier != null) csvReader.setTextQualifier(textQualifier); // Try to set column names from the first file if (hasHeaders) { if (columns == null) { if (csvReader.readHeaders()) { columns = csvReader.getHeaders(); } else { throw new CsvHeaderException( String.format("Cannot read headers from file: %1$s", sourceFiles.get(currentFileIndex))); } } else { csvReader.skipRecord(); } } } @Override public void init() { // Check correct configuration if (hasHeaders == false && columns == null) { throw new RuntimeException( "Input files must have headers or column names must be defined."); } if ((sourcePath = path) == null) { // Read source path from runtime properties sourcePath = runtimeProperties.getProperty(String.format("csvSource.%1$s.path", name)); } // Check source path is set if (sourcePath == null || "".equals(sourcePath)) { throw new RuntimeException(name!= null ? String.format("csvSource.%1$s.path is undefined.", name):"path is undefined"); } // Create a list of files from the source path sourceFiles = new ArrayList<String>(); for (String path : sourcePath.split(File.pathSeparator)) { if (path == null || "".equals(path)) { continue; } File source = new File(path); if (source.isFile() && source.length() != 0) { sourceFiles.add(source.getAbsolutePath()); } else if (source.isDirectory()) { for (File f : source.listFiles()) { if (f.isFile() && f.length() != 0) { sourceFiles.add(f.getAbsolutePath()); } } } } // Check that at least 1 file is defined if (sourceFiles.isEmpty()) { throw new RuntimeException( String.format("No files found at the specified location: %1$s", sourcePath)); } } @Override public void close() { // Close current reader if available if (csvReader != null) { csvReader.close(); } } public String getName() { return name; } public void setName(String name) { this.name = name; } public String getPath() { return path; } public void setPath(String path) { this.path = path; } public Character getDelimiter() { return delimiter; } public void setDelimiter(Character delimiter) { this.delimiter = delimiter; } public String getCharacterSet() { return characterSet; } public void setCharacterSet(String characterSet) { this.characterSet = characterSet; } public void setColumns(String columns) { this.columns = columns.split("\\s+"); if (this.columns != null && this.columns.length <= 0) { this.columns = null; } } public boolean isHasHeaders() { return hasHeaders; } public void setHasHeaders(boolean hasHeaders) { this.hasHeaders = hasHeaders; } public Character getTextQualifier() { return textQualifier; } public void setTextQualifier(Character textQualifier) { this.textQualifier = textQualifier; } public boolean isUseTextQualifier() { return useTextQualifier; } public void setUseTextQualifier(boolean useTextQualifier) { this.useTextQualifier = useTextQualifier; } public Properties getRuntimeProperties() { return runtimeProperties; } public void setRuntimeProperties(Properties runtimeProperties) { this.runtimeProperties = runtimeProperties; } }