/*
* Copyright (c) 2017 OBiBa. All rights reserved.
*
* This program and the accompanying materials
* are made available under the terms of the GNU Public License v3.0.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.obiba.magma.datasource.csv;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
import javax.validation.constraints.NotNull;
import com.google.common.collect.Lists;
import org.obiba.magma.Datasource;
import org.obiba.magma.Disposable;
import org.obiba.magma.Initialisable;
import org.obiba.magma.MagmaRuntimeException;
import org.obiba.magma.NoSuchValueSetException;
import org.obiba.magma.NoSuchVariableException;
import org.obiba.magma.Timestamps;
import org.obiba.magma.ValueSet;
import org.obiba.magma.ValueTable;
import org.obiba.magma.Variable;
import org.obiba.magma.VariableEntity;
import org.obiba.magma.datasource.csv.converter.VariableConverter;
import org.obiba.magma.support.AbstractValueTable;
import org.obiba.magma.support.DatasourceParsingException;
import org.obiba.magma.support.VariableEntityBean;
import org.obiba.magma.support.VariableEntityProvider;
import org.obiba.magma.type.TextType;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import au.com.bytecode.opencsv.CSVReader;
import au.com.bytecode.opencsv.CSVWriter;
@SuppressWarnings({ "OverlyCoupledClass", "OverlyComplexClass" })
public class CsvValueTable extends AbstractValueTable implements Initialisable, Disposable {
public static final String DEFAULT_ENTITY_TYPE = "Participant";
public static final byte BLANKING_CHARACTER = ' ';
public static final byte NEWLINE_CHARACTER = '\n';
private static final Logger log = LoggerFactory.getLogger(CsvValueTable.class);
private ValueTable refTable;
@Nullable
private File variableFile;
@Nullable
private final File dataFile;
private CsvVariableEntityProvider variableEntityProvider;
private String entityType;
private VariableConverter variableConverter;
final Set<VariableEntity> entities = new LinkedHashSet<>();
private final Map<String, List<String[]>> entityLinesBuffer = new LinkedHashMap<>();
private CSVReader csvDataReader;
private boolean isVariablesFileEmpty;
private boolean isDataFileEmpty;
private Map<String, Integer> dataHeaderMap = new HashMap<>();
private List<String> missingVariableNames = new ArrayList<>();
private final CsvTimestamps timestamps;
private boolean multilines = false;
public CsvValueTable(Datasource datasource, String name, File dataFile, String entityType) {
this(datasource, name, null, dataFile, entityType);
}
public CsvValueTable(Datasource datasource, String name, @Nullable File variableFile, @Nullable File dataFile,
String entityType) {
super(datasource, name);
this.variableFile = variableFile;
this.dataFile = dataFile;
this.entityType = entityType == null ? DEFAULT_ENTITY_TYPE : entityType;
timestamps = new CsvTimestamps(variableFile, dataFile);
}
public CsvValueTable(Datasource datasource, ValueTable refTable, @Nullable File dataFile) {
super(datasource, refTable.getName());
this.refTable = refTable;
this.dataFile = dataFile;
entityType = refTable.getEntityType();
timestamps = new CsvTimestamps(variableFile, dataFile);
}
@NotNull
@Override
protected VariableEntityProvider getVariableEntityProvider() {
return variableEntityProvider;
}
@Override
public Set<VariableEntity> getVariableEntities() {
return ImmutableSet.copyOf(variableEntityProvider.getVariableEntities());
}
@Override
public synchronized ValueSet getValueSet(VariableEntity entity) throws NoSuchValueSetException {
if(!entities.contains(entity)) {
throw new NoSuchValueSetException(this, entity);
}
// read line from data file
return readValueSet(entity);
}
public boolean isMultilines() {
// either detected or configured
return multilines || getCsvDatasource().isMultilines();
}
@Override
public void initialise() {
try {
initialiseEntities();
initialiseVariables();
variableEntityProvider = new CsvVariableEntityProvider(this, entityType);
} catch(IOException e) {
throw new DatasourceParsingException("Error occurred initialising csv datasource.", e, "CsvInitialisationError");
}
}
@Override
public void dispose() {
resetCsvDataReader();
}
//
// Private methods
//
/**
* Read the value set from the data CSV file and buffer any other entities that could have encountered.
*
* @param entity
* @return
*/
@SuppressWarnings({ "OverlyLongMethod", "PMD.NcssMethodCount" })
private ValueSet readValueSet(VariableEntity entity) {
if (entityLinesBuffer.isEmpty()) {
resetCsvDataReader();
try {
boolean firstRead = csvDataReader == null;
String[] current = getCsvDataReader().readNext();
// skip header
if (firstRead) current = getCsvDataReader().readNext();
while (current != null) {
String id = current.length > 0 ? current[0] : "";
if (!entityLinesBuffer.containsKey(id)) {
entityLinesBuffer.put(id, Lists.newArrayList());
}
entityLinesBuffer.get(id).add(current);
current = getCsvDataReader().readNext();
}
} catch (IOException e) {
throw new MagmaRuntimeException("Failed reading CSV data file", e);
}
}
return new CsvValueSet(this, entity, dataHeaderMap, entityLinesBuffer.get(entity.getIdentifier()));
}
private void initialiseVariables() throws IOException {
initialiseVariablesFromDataFile();
if(refTable == null) {
if(variableFile != null && variableFile.exists()) {
updateDataVariablesFromVariablesFile();
}
} else {
updateDataVariablesFromRefTable();
}
}
@SuppressWarnings("OverlyNestedMethod")
private void initialiseVariablesFromDataFile() throws IOException {
if(dataFile == null) return;
// Obtain the variable names from the first line of the data file. Header line is = entity_id + variable names
try(CSVReader dataHeaderReader = getCsvDataReader()) {
String[] line = dataHeaderReader.readNext();
if(line != null) {
// skip first header as it's the participant ID
for(int i = 1; i < line.length; i++) {
String variableName = line[i].trim();
addVariableValueSource(new CsvVariableValueSource(Variable.Builder //
.newVariable(variableName, TextType.get(), entityType == null ? DEFAULT_ENTITY_TYPE : entityType) //
.repeatable(isMultilines()) //
.occurrenceGroup(isMultilines() ? getName() : null) //
.index(i) //
.build()));
dataHeaderMap.put(variableName, i);
}
isDataFileEmpty = false;
}
} finally {
resetCsvDataReader();
}
isVariablesFileEmpty = true;
}
private void updateDataVariablesFromVariablesFile() throws IOException {
try(CSVReader variableReader = getCsvDatasource().getCsvReader(variableFile)) {
if(variableReader == null) return;
initialiseVariablesFromVariablesFile(variableReader);
}
}
private void initialiseVariablesFromVariablesFile(CSVReader variableReader) throws IOException {
// first line is variable headers
String[] line = variableReader.readNext();
if(line == null) {
initialiseVariablesFromEmptyVariablesFile();
return;
}
variableConverter = new VariableConverter(line);
String[] nextLine = variableReader.readNext();
while(nextLine != null) {
if(nextLine.length <= 1) {
nextLine = variableReader.readNext();
continue;
}
Variable var = variableConverter.unmarshal(nextLine);
entityType = var.getEntityType();
String variableName = var.getName();
// update only variable that was in data file
if(hasVariable(variableName)) {
removeVariableValueSource(variableName);
addVariableValueSource(new CsvVariableValueSource(var));
}
nextLine = variableReader.readNext();
}
}
private void initialiseVariablesFromEmptyVariablesFile() {
if(variableConverter == null) {
String[] defaultVariablesHeader = ((CsvDatasource) getDatasource()).getDefaultVariablesHeader();
log.debug(
"A variables.csv file or header was not explicitly provided for the table {}. Use the default header {}.",
getName(), defaultVariablesHeader);
variableConverter = new VariableConverter(defaultVariablesHeader);
}
isVariablesFileEmpty = true;
}
private void updateDataVariablesFromRefTable() throws IOException {
entityType = refTable.getEntityType();
for(Variable var : refTable.getVariables()) {
// update only variable that was in data file
if(hasVariable(var.getName())) {
removeVariableValueSource(var.getName());
addVariableValueSource(new CsvVariableValueSource(var));
}
}
missingVariableNames = getMissingVariableNames();
}
@Nullable
CSVWriter getVariableWriter() {
return getCsvDatasource().getCsvWriter(variableFile);
}
@Nullable
CSVWriter getValueWriter() {
return getCsvDatasource().getCsvWriter(dataFile);
}
@Nullable
File getParentFile() {
return dataFile == null ? null : dataFile.getParentFile();
}
/**
* Get the CSV reader of the data file (create it if necessary).
*
* @return
*/
@SuppressWarnings("OverlyNestedMethod")
private CSVReader getCsvDataReader() {
if(csvDataReader == null) {
csvDataReader = getCsvDatasource().getCsvReader(dataFile);
try {
// move to the first row
if(csvDataReader != null) for(int i = 1; i < getCsvDatasource().getFirstRow(); i++)
csvDataReader.readNext();
} catch(IOException e) {
// ignore
}
}
return csvDataReader;
}
/**
* Close the CSV data file reader and prepare for next creation.
*/
private void resetCsvDataReader() {
if(csvDataReader == null) return;
try {
csvDataReader.close();
} catch(IOException e) {
// ignore
} finally {
csvDataReader = null;
}
}
public Map<String, Integer> getDataHeaderMap() {
return dataHeaderMap;
}
public String[] getDataHeaderAsArray() {
String[] header = new String[dataHeaderMap.size() + 1];
header[0] = CsvLine.ENTITY_ID_NAME;
for(Map.Entry<String, Integer> entry : dataHeaderMap.entrySet()) {
header[entry.getValue()] = entry.getKey();
}
return header;
}
public void setDataHeaderMap(Map<String, Integer> dataHeaderMap) {
this.dataHeaderMap = dataHeaderMap;
}
public void setVariablesHeader(String... header) {
variableConverter = new VariableConverter(header);
}
public VariableConverter getVariableConverter() {
return variableConverter;
}
public boolean isVariablesFileEmpty() {
return isVariablesFileEmpty;
}
public void setVariablesFileEmpty(boolean isVariablesFileEmpty) {
this.isVariablesFileEmpty = isVariablesFileEmpty;
}
public boolean isDataFileEmpty() {
return isDataFileEmpty;
}
public void setDataFileEmpty(boolean isDataFileEmpty) {
this.isDataFileEmpty = isDataFileEmpty;
}
@NotNull
@Override
public Timestamps getTimestamps() {
return timestamps;
}
@Override
public Timestamps getValueSetTimestamps(VariableEntity entity) throws NoSuchValueSetException {
if(!entities.contains(entity)) {
throw new NoSuchValueSetException(this, entity);
}
return timestamps;
}
/**
* Convenience method equivalent to {@code (CsvDatasource) getDatasource()}.
*/
private CsvDatasource getCsvDatasource() {
return (CsvDatasource) getDatasource();
}
private String getCharacterSet() {
return getCsvDatasource().getCharacterSet();
}
/**
* Read the entity identifiers from the CSV data file.
*
* @throws IOException
*/
private void initialiseEntities() throws IOException {
isDataFileEmpty = true;
if(dataFile == null || !dataFile.exists()) {
return;
}
try(CSVReader dataHeaderReader = getCsvDataReader()) {
// skip first line (headers)
String[] line = dataHeaderReader.readNext();
// first line(s) is headers = entity_id + variable names
isDataFileEmpty = line == null || line.length == 0;
buildEntitySet(dataHeaderReader);
} finally {
resetCsvDataReader();
}
}
/**
* Read the entity indetifiers from the non-empty CSV data lines (first field).
*
* @param dataHeaderReader
* @throws IOException
*/
private void buildEntitySet(CSVReader dataHeaderReader) throws IOException {
String[] line;
while((line = dataHeaderReader.readNext()) != null) {
if(line.length == 0) continue;
String identifier = line[0];
if(Strings.isNullOrEmpty(identifier)) continue;
isDataFileEmpty = false;
VariableEntityBean entity = new VariableEntityBean(entityType, identifier);
if (entities.contains(entity)) {
multilines = true;
} else {
entities.add(entity);
}
}
}
/**
* Returns missing {@link Variable}s. All variables will be of the default value "text". Missing variables are created
* for variables names specified in a csv data file that are not provided with associated {@link Variable}s when the
* CsvValueTable is created. This happens when {@link Variable}s are provided from a reference table, and that
* reference table does not have a {@link Variable} for every variable named in the csv data file.
*
* @return A collection of missing Variables.
*/
public Iterable<Variable> getMissingVariables() {
Collection<Variable> variables = new ArrayList<>(missingVariableNames.size());
for(String variableName : missingVariableNames) {
Variable.Builder variableBuilder = Variable.Builder.newVariable(variableName, TextType.get(), entityType);
variables.add(variableBuilder.build());
}
return variables;
}
/**
* Returns a list of variable names specified in the cvs data file for this table that do not have an associated
* {@link Variable}. This can occur when {@code Variable}s are obtained from a reference to another table. That table
* may not have a {@code Variable} for every variable specified in the csv data file.
*
* @return A list of variable names that are missing {@link Variable}s.
* @throws IOException thrown when there is a problem reading the csv data file.
*/
private List<String> getMissingVariableNames() throws IOException {
List<String> missingVariables = new ArrayList<>();
// Obtain the variable names from the first line of the data file. Header line is = entity_id + variable names
try(CSVReader dataHeaderReader = getCsvDatasource().getCsvReader(dataFile)) {
if(dataHeaderReader != null) {
String[] line = dataHeaderReader.readNext();
if(line != null) {
for(int i = 1; i < line.length; i++) {
String variableName = line[i].trim();
try {
getVariableValueSource(variableName);
} catch(NoSuchVariableException e) {
missingVariables.add(variableName);
}
}
}
}
}
return missingVariables;
}
}