package org.molgenis.omx.dataset;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import jxl.read.biff.BiffException;
import org.apache.log4j.Logger;
import org.molgenis.framework.db.Database;
import org.molgenis.framework.db.DatabaseException;
import org.molgenis.framework.db.QueryRule;
import org.molgenis.framework.db.QueryRule.Operator;
import org.molgenis.framework.tupletable.TableException;
import org.molgenis.framework.tupletable.TupleTable;
import org.molgenis.framework.tupletable.impl.CsvTable;
import org.molgenis.model.elements.Field;
import org.molgenis.observ.DataSet;
import org.molgenis.observ.ObservableFeature;
import org.molgenis.observ.ObservationSet;
import org.molgenis.observ.ObservationTarget;
import org.molgenis.observ.ObservedValue;
import org.molgenis.util.CsvWriter;
import org.molgenis.util.SimpleTuple;
import org.molgenis.util.Tuple;
public class DataSetImporter
{
private static final Logger LOG = Logger.getLogger(DataSetImporter.class);
private static final String ENTITY_DATASET_PREFIX = "DataSet";
private Database db;
public DataSetImporter(Database db)
{
if (db == null) throw new IllegalArgumentException();
this.db = db;
}
public void importXLS(File file, List<String> dataSetSheetNames) throws IOException, DatabaseException
{
// fixes the problem where, even though decimals have a "." they are
// still read as "," because of the locale!
// TODO: dangerous: entire application locale changes! but workbook
// locale settings don't seem to have an effect...
Locale defaultLocale = Locale.getDefault();
Locale.setDefault(Locale.US);
Workbook workbook = null;
try
{
String sheetPrefix = ENTITY_DATASET_PREFIX.toLowerCase() + '_';
workbook = Workbook.getWorkbook(file);
for (Sheet sheet : workbook.getSheets())
{
if (dataSetSheetNames.contains(sheet.getName()))
{
String identifier = sheet.getName().substring(sheetPrefix.length());
File csvFile = convertToCSVFile(sheet, identifier);
importCSV(csvFile, identifier);
}
}
}
catch (BiffException e)
{
throw new IOException(e);
}
finally
{
Locale.setDefault(defaultLocale); // restore locale
if (workbook != null) workbook.close();
}
}
private void importCSV(File file, String identifier) throws IOException, DatabaseException
{
LOG.info("importing dataset " + identifier + " from file " + file + "...");
List<DataSet> dataSets = db.find(DataSet.class, new QueryRule(DataSet.IDENTIFIER, Operator.EQUALS, identifier));
if (dataSets == null || dataSets.isEmpty())
{
LOG.warn("dataset " + identifier + " does not exist in db");
return;
}
else if (dataSets.size() > 1)
{
LOG.warn("multiple datasets exist for identifier " + identifier);
return;
}
DataSet dataSet = dataSets.get(0);
TupleTable csvTable = null;
try
{
db.beginTx();
csvTable = new CsvTable(file);
List<Field> headerFields = csvTable.getAllColumns();
for (Tuple row : csvTable.getRows())
{
// find current observation target
String observationTargetIdentifier = row.getString(0);
List<ObservationTarget> observationTargets = db.find(ObservationTarget.class, new QueryRule(
ObservationTarget.IDENTIFIER, Operator.EQUALS, observationTargetIdentifier));
if (observationTargets == null || observationTargets.isEmpty()) throw new DatabaseException(
"ObservationTarget " + observationTargetIdentifier + " does not exist in db");
ObservationTarget observationTarget = observationTargets.get(0);
// create observation set
ObservationSet observationSet = new ObservationSet();
observationSet.setTarget(observationTarget);
observationSet.setPartOfDataSet(dataSet);
db.add(observationSet);
for (int col = 1; col < headerFields.size(); ++col)
{
// find current observation feature
String observableFeatureIdentifier = headerFields.get(col).getLabel();
List<ObservableFeature> observableFeatures = db.find(ObservableFeature.class, new QueryRule(
ObservableFeature.IDENTIFIER, Operator.EQUALS, observableFeatureIdentifier));
if (observableFeatures == null || observableFeatures.isEmpty()) throw new IOException(
"ObservableFeature " + observableFeatureIdentifier + " does not exist in db");
ObservableFeature observableFeature = observableFeatures.get(0);
// create observed value
String value = row.getString(col);
ObservedValue observedValue = new ObservedValue();
observedValue.setFeature(observableFeature);
observedValue.setValue(value);
observedValue.setObservationSet(observationSet);
// add to db
db.add(observedValue);
}
}
db.commitTx();
}
catch (DatabaseException e)
{
db.rollbackTx();
throw e;
}
catch (Exception e)
{
db.rollbackTx();
throw new IOException(e);
}
finally
{
if (csvTable != null) try
{
csvTable.close();
}
catch (TableException e)
{
throw new IOException(e);
}
}
}
private File convertToCSVFile(Sheet sheet, String identifier) throws IOException
{
String tmpFileName = "tmp" + ENTITY_DATASET_PREFIX + '_' + identifier + ".txt";
// copied from *ExcelReader.java
File tmpDataSet = new File(System.getProperty("java.io.tmpdir") + File.separator + tmpFileName);
if (tmpDataSet.exists())
{
boolean deleteSuccess = tmpDataSet.delete();
if (!deleteSuccess)
{
throw new IOException("Deletion of tmp file '" + tmpFileName + "' failed, cannot proceed.");
}
}
boolean createSuccess = tmpDataSet.createNewFile();
if (!createSuccess)
{
throw new IOException("Creation of tmp file '" + tmpFileName + "' failed, cannot proceed.");
}
boolean fileHasHeaders = writeSheetToFile(sheet, tmpDataSet);
if (fileHasHeaders)
{
return tmpDataSet;
}
else
{
tmpDataSet.delete();
throw new IOException("error occured writing sheet to file: " + tmpFileName);
}
}
private boolean writeSheetToFile(Sheet sheet, File file) throws IOException
{
// get headers
Cell[] headerCells = sheet.getRow(0);
final int nrHeaders = headerCells.length;
if (nrHeaders == 0) return false;
List<String> headers = new ArrayList<String>(nrHeaders);
for (Cell headerCell : headerCells)
headers.add(headerCell.getContents());
// create writer
CsvWriter cw;
try
{
cw = new CsvWriter(new PrintWriter(file, "UTF-8"), headers);
}
catch (UnsupportedEncodingException e)
{
throw new RuntimeException(e);
}
cw.setMissingValue("");
// write csv to file
try
{
cw.writeHeader();
final int nrRows = sheet.getRows();
for (int rowIndex = 1; rowIndex < nrRows; rowIndex++)
{
Tuple t = new SimpleTuple();
int colIndex = 0;
for (Cell c : sheet.getRow(rowIndex))
{
if (colIndex < headers.size() && c.getContents() != null)
{
t.set(headers.get(colIndex), c.getContents());
}
colIndex++;
}
cw.writeRow(t);
}
}
finally
{
cw.close();
}
return true;
}
}