package edu.isi.karma.imp.csv;
import com.opencsv.CSVReader;
import edu.isi.karma.imp.Import;
import edu.isi.karma.rep.*;
import edu.isi.karma.rep.HNode.HNodeType;
import edu.isi.karma.rep.metadata.WorksheetProperties.Property;
import edu.isi.karma.rep.metadata.WorksheetProperties.SourceTypes;
import edu.isi.karma.util.EncodingDetector;
import edu.isi.karma.webserver.KarmaException;
import org.json.JSONArray;
import org.json.JSONObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.*;
import java.util.HashMap;
import java.util.Map;
public class CSVImport extends Import {
private static Logger logger = LoggerFactory.getLogger(CSVImport.class);
protected final int headerRowIndex;
protected final int dataStartRowIndex;
protected final char delimiter;
protected final char quoteCharacter;
protected final char escapeCharacter;
protected final InputStream is;
protected final String encoding;
protected final int maxNumLines;
protected final JSONArray columnsJson;
protected final String sourceName;
public CSVImport(int headerRowIndex, int dataStartRowIndex,
char delimiter, char quoteCharacter, String encoding,
int maxNumLines,
String sourceName,
InputStream is,
Workspace workspace,
JSONArray columnsJson) {
super(sourceName, workspace, encoding);
this.headerRowIndex = headerRowIndex;
this.dataStartRowIndex = dataStartRowIndex;
this.sourceName = sourceName;
this.delimiter = delimiter;
// Trick:
// Passing quoteCharacter as $ signals that we don't want any quote character
// Required because CSVReader constructor doesn't take ignoreQuotation (as does CSVParser), sigh
if(quoteCharacter == '$') {
this.quoteCharacter = '\0';
this.escapeCharacter = '\0';
} else {
this.escapeCharacter = '\\';
this.quoteCharacter = quoteCharacter;
}
this.encoding = encoding;
this.maxNumLines = maxNumLines;
this.is = is;
this.columnsJson = columnsJson;
}
public CSVImport duplicate() throws IOException {
return new CSVImport(headerRowIndex, dataStartRowIndex, delimiter, quoteCharacter, encoding, maxNumLines, sourceName, is, workspace, columnsJson);
}
@Override
public Worksheet generateWorksheet() throws IOException, KarmaException {
Table dataTable = getWorksheet().getDataTable();
// Index for row currently being read
int rowCount = 0;
Map<Integer, String> hNodeIdList = new HashMap<>();
CSVReader reader = getCSVReader();
// Populate the worksheet model
String[] rowValues = null;
while ((rowValues = reader.readNext()) != null) {
// logger.debug("Read line: '" + line + "'");
// Check for the header row
if (rowCount + 1 == headerRowIndex) {
hNodeIdList = addHeaders(getWorksheet(), getFactory(), rowValues, reader);
rowCount++;
continue;
}
// Populate the model with data rows
if (rowCount + 1 >= dataStartRowIndex) {
boolean added = addRow(getWorksheet(), getFactory(), rowValues, hNodeIdList, dataTable);
if(added) {
rowCount++;
if(maxNumLines > 0 && (rowCount - dataStartRowIndex) >= maxNumLines-1) {
break;
}
}
continue;
}
rowCount++;
}
reader.close();
getWorksheet().getMetadataContainer().getWorksheetProperties().setPropertyValue(Property.sourceType, SourceTypes.CSV.toString());
return getWorksheet();
}
protected BufferedReader getLineReader() throws IOException {
// Prepare the reader for reading file line by line
InputStreamReader isr = EncodingDetector.getInputStreamReader(is, encoding);
return new BufferedReader(isr);
}
protected CSVReader getCSVReader() throws IOException {
BufferedReader br = getLineReader();
return new CSVReader(br, delimiter, quoteCharacter, escapeCharacter);
}
private Map<Integer, String> addHeaders(Worksheet worksheet, RepFactory fac,
String[] rowValues, CSVReader reader) throws IOException {
HTable headers = worksheet.getHeaders();
Map<Integer, String> headersMap = new HashMap<>();
for (int i = 0; i < rowValues.length; i++) {
HNode hNode = null;
if (headerRowIndex == 0) {
if (isVisible("Column_" + (i + 1)))
hNode = headers.addHNode("Column_" + (i + 1), HNodeType.Regular, worksheet, fac);
} else {
if (isVisible(rowValues[i]))
hNode = headers.addHNode(rowValues[i], HNodeType.Regular, worksheet, fac);
}
if (hNode != null)
headersMap.put(i, hNode.getId());
}
return headersMap;
}
private boolean addRow(Worksheet worksheet, RepFactory fac, String[] rowValues,
Map<Integer, String> hNodeIdMap, Table dataTable) throws IOException {
if (rowValues == null || rowValues.length == 0) {
return false;
}
Row row = dataTable.addRow(fac);
int size = hNodeIdMap.size();
if (columnsJson != null)
size = columnsJson.length();
for (int i = 0; i < rowValues.length; i++) {
if(i >= size) {
HTable headers = worksheet.getHeaders();
HNode hNode = headers.addHNode("Column_" + (i + 1), HNodeType.Regular, worksheet, fac);
hNodeIdMap.put(i, hNode.getId());
size = hNodeIdMap.size();
}
if (i < size) {
String hNodeId = hNodeIdMap.get(i);
if (hNodeId != null)
row.setValue(hNodeId, rowValues[i], fac);
} else {
// TODO Our model does not allow a value to be added to a row
// without its associated HNode. In CSVs, there could be case
// where values in rows are greater than number of column names.
logger.error("More data elements detected in the row than number of headers!");
}
}
return true;
}
private boolean isVisible(String key) {
if (columnsJson == null)
return true;
for (int i = 0; i < columnsJson.length(); i++) {
JSONObject obj = columnsJson.getJSONObject(i);
if (obj.has(key))
return obj.getBoolean(key);
}
return false;
}
}