/*******************************************************************************
* Copyright 2016 Observational Health Data Sciences and Informatics
*
* This file is part of WhiteRabbit
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
******************************************************************************/
package org.ohdsi.rabbitInAHat.dataModel;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVRecord;
import org.ohdsi.utilities.files.QuickAndDirtyXlsxReader;
import org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Sheet;
public class Database implements Serializable {
public enum CDMVersion {
CDMV4("CDMV4.csv"), CDMV5("CDMV5.csv"), CDMV501("CDMV5.0.1.csv");
private final String fileName;
CDMVersion(String fileName) {
this.fileName = fileName;
}
}
private List<Table> tables = new ArrayList<Table>();
private static final long serialVersionUID = -3912166654601191039L;
private String dbName = "";
public List<Table> getTables() {
return tables;
}
public Table getTableByName(String name) {
for (Table table : tables)
if (table.getName().toLowerCase().equals(name.toLowerCase()))
return table;
return null;
}
public void setTables(List<Table> tables) {
this.tables = tables;
}
public String getDbName() {
return dbName;
}
public static Database generateCDMModel(CDMVersion cdmVersion) {
return Database.generateModelFromCSV(Database.class.getResourceAsStream(cdmVersion.fileName), cdmVersion.fileName);
}
public static Database generateModelFromCSV(InputStream stream, String dbName) {
Database database = new Database();
database.dbName = dbName.substring(0, dbName.lastIndexOf("."));
Map<String, Table> nameToTable = new HashMap<String, Table>();
try {
for (CSVRecord row : CSVFormat.RFC4180.withHeader().parse(new InputStreamReader(stream))) {
Table table = nameToTable.get(row.get("TABLE_NAME").toLowerCase());
if (table == null) {
table = new Table();
table.setDb(database);
table.setName(row.get("TABLE_NAME").toLowerCase());
nameToTable.put(row.get("TABLE_NAME").toLowerCase(), table);
database.tables.add(table);
}
Field field = new Field(row.get("COLUMN_NAME").toLowerCase(), table);
field.setNullable(row.get("IS_NULLABLE").equals("YES"));
field.setType(row.get("DATA_TYPE"));
field.setDescription(row.get("DESCRIPTION"));
table.getFields().add(field);
}
} catch (IOException e) {
throw new RuntimeException(e.getMessage());
}
return database;
}
public static Database generateModelFromScanReport(String filename) {
Database database = new Database();
Map<String, Table> nameToTable = new HashMap<String, Table>();
QuickAndDirtyXlsxReader workbook = new QuickAndDirtyXlsxReader(filename);
Sheet sheet = workbook.get(0);
Iterator<org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row> iterator = sheet.iterator();
Map<String, Integer> fieldName2ColumnIndex = new HashMap<String, Integer>();
for (String header : iterator.next())
fieldName2ColumnIndex.put(header, fieldName2ColumnIndex.size());
while (iterator.hasNext()) {
org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row row = iterator.next();
String tableName = row.get(fieldName2ColumnIndex.get("Table"));
if (tableName.length() != 0) {
Table table = nameToTable.get(tableName);
if (table == null) {
table = new Table();
table.setName(tableName.toLowerCase());
table.setRowCount((int) Double.parseDouble(row.get(fieldName2ColumnIndex.get("N rows"))));
nameToTable.put(tableName, table);
database.tables.add(table);
}
String fieldName = row.get(fieldName2ColumnIndex.get("Field"));
Field field = new Field(fieldName.toLowerCase(), table);
Integer index;
// Someone may have manually deleted data, so can't assume this
// is always there:
index = fieldName2ColumnIndex.get("Fraction empty");
if (index != null && index < row.size())
field.setNullable(!row.get(index).equals("0"));
index = fieldName2ColumnIndex.get("Type");
if (index != null && index < row.size())
field.setType(row.get(index));
index = fieldName2ColumnIndex.get("Max length");
if (index != null && index >= 0 && index < row.size())
field.setMaxLength((int) (Double.parseDouble(row.get(index))));
field.setValueCounts(getValueCounts(workbook, tableName, fieldName));
table.getFields().add(field);
}
}
// database.defaultOrdering = new ArrayList<Table>(database.tables);
return database;
}
private static String[][] getValueCounts(QuickAndDirtyXlsxReader workbook, String tableName, String fieldName) {
Sheet tableSheet = null;
for (Sheet sheet : workbook)
if (sheet.getName().equals(tableName)) {
tableSheet = sheet;
break;
}
if (tableSheet == null) // Sheet not found for table, return empty array
return new String[0][0];
Iterator<org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row> iterator = tableSheet.iterator();
org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row header = iterator.next();
int index = header.indexOf(fieldName);
List<String[]> list = new ArrayList<String[]>();
if (index != -1) // Could happen when people manually delete columns
while (iterator.hasNext()) {
org.ohdsi.utilities.files.QuickAndDirtyXlsxReader.Row row = iterator.next();
if (row.size() > index) {
String value = row.get(index);
String count;
if (row.size() > index + 1)
count = row.get(index + 1);
else
count = "";
if (value.equals("") && count.equals(""))
break;
list.add(new String[] { value, count });
}
}
return list.toArray(new String[list.size()][2]);
}
}