package plugins.biobankimporter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import jxl.Sheet;
import org.molgenis.auth.Institute;
import org.molgenis.auth.Person;
import org.molgenis.compute.ComputeProtocol;
import org.molgenis.core.OntologyTerm;
import org.molgenis.framework.db.Database;
import org.molgenis.framework.db.DatabaseException;
import org.molgenis.framework.db.Query;
import org.molgenis.framework.db.QueryRule;
import org.molgenis.framework.db.QueryRule.Operator;
import org.molgenis.organization.Investigation;
import org.molgenis.organization.InvestigationElement;
import org.molgenis.pheno.Category;
import org.molgenis.pheno.Measurement;
import org.molgenis.pheno.ObservationTarget;
import org.molgenis.pheno.ObservedValue;
import org.molgenis.pheno.Panel;
import org.molgenis.protocol.Protocol;
import org.molgenis.util.SimpleTuple;
import org.molgenis.util.Tuple;
import app.DatabaseFactory;
public class TableController
{
private Database db;
public int columnSize = 0;
public List<TableField> configuration;
public TableField field;
private int observationTarget = -1;
private String MeasurementDataType = "Not Matching";
private HashMap<String, String> InputToMolgenisDataType = new HashMap<String, String>();
private HashMap<Integer, TableField> columnIndexToTableField = new HashMap<Integer, TableField>();
private List<Integer> missingCategoryList = new ArrayList<Integer>();
private HashMap<Integer, List<Integer>> relationIndex = new HashMap<Integer, List<Integer>>();
private String investigationName = null;
private String excelDirection = "UploadFileByColumn";
private HashMap<String, String> checkExistingMeasurementsInDB = new HashMap<String, String>();
private HashMap<String, String> checkExistingEntitesInDB = new HashMap<String, String>();
public TableController(int i, Database db)
{
this.db = db;
this.columnSize = i;
configuration = new ArrayList<TableField>();
}
public void addField(String classType, String fieldName, String multipleValues, int[] columnList, Boolean Vertical)
{
this.addField(classType, fieldName, multipleValues, columnList, Vertical, new SimpleTuple());
}
public void addField(String ClassType, String fieldName, String multipleValues, int[] columnList, Boolean Vertical,
Tuple defaults)
{
for (int i = 0; i < columnList.length; i++)
{
this.addField(ClassType, fieldName, multipleValues, columnList[i], Vertical, defaults, -1);
}
}
public void addField(String ClassType, String fieldName, String multipleValues, boolean Vertical,
int dependedIndex, int... columnIndexes)
{
List<Integer> columnList = new ArrayList<Integer>();
for (int i = 0; i < columnIndexes.length; i++)
{
if (columnIndexToTableField.containsKey(columnIndexes[i]))
{
columnIndexToTableField.get(columnIndexes[i]).setDependentColumnIndex(dependedIndex);
columnIndexToTableField.get(columnIndexes[i]).setRelation(fieldName);
}
else
{
this.addField(ClassType, fieldName, multipleValues, columnIndexes[i], Vertical, new SimpleTuple(),
dependedIndex);
columnIndexToTableField.get(columnIndexes[i]).setRelation(fieldName);
}
columnList.add(columnIndexes[i]);
}
relationIndex.put(dependedIndex, columnList);
}
public void addField(String ClassType, String fieldName, String multipleValues, int columnIndex, Boolean Vertical)
{
this.addField(ClassType, fieldName, multipleValues, columnIndex, Vertical, new SimpleTuple(), -1);
}
public void addField(String ClassType, String fieldName, String multipleValues, int columnIndex, boolean Vertical,
int... dependentColumnIndex)
{
this.addField(ClassType, fieldName, multipleValues, columnIndex, Vertical, new SimpleTuple(),
dependentColumnIndex);
}
public void addField(String ClassType, String fieldName, String multipleValues, int columnIndex, boolean Vertical,
Tuple defaults)
{
this.addField(ClassType, fieldName, multipleValues, columnIndex, Vertical, defaults, -1);
}
public void addField(String ClassType, String fieldName, String multipleValues, int[] coHeaders, int targetIndex,
boolean Vertical)
{
observationTarget = targetIndex;
this.addField(ClassType, fieldName, multipleValues, coHeaders, Vertical, new SimpleTuple());
observationTarget = -1;
}
public void addField(String ClassType, String fieldName, String multipleValues, int columnIndex, Boolean Vertical,
Tuple defaults, int... dependentColumnIndex)
{
try
{
// create a tableField that will take care of loading columnIndex
// into 'name' property
field = new TableField(ClassType, fieldName, multipleValues, columnIndex, Vertical, defaults,
dependentColumnIndex);
// add to the parser configuration
configuration.add(field);
columnIndexToTableField.put(columnIndex, field);
if (observationTarget != -1)
{
field.setObservationTarget(observationTarget);
}
}
catch (Exception e)
{
// TODO Auto-generated catch block
e.printStackTrace();
throw new RuntimeException(e);
}
}
public TableField getField(int columnIndex)
{
return configuration.get(columnIndex);
}
public List<TableField> getConfiguration()
{
return configuration;
}
@SuppressWarnings("unchecked")
public void convertIntoPheno(Sheet[] sheets, int startingRowIndex, boolean multipleSheets,
boolean sheetImportProtocol) throws DatabaseException
{
List<Measurement> headerMeasurements = new ArrayList<Measurement>();
List<InvestigationElement> measurementList = new ArrayList<InvestigationElement>();
List<InvestigationElement> categoryList = new ArrayList<InvestigationElement>();
List<InvestigationElement> protocolList = new ArrayList<InvestigationElement>();
List<InvestigationElement> observationTargetList = new ArrayList<InvestigationElement>();
List<InvestigationElement> panelList = new ArrayList<InvestigationElement>();
List<ObservedValue> observedValueList = new ArrayList<ObservedValue>();
HashMap<String, OntologyTerm> ontologyTermOfList = new HashMap<String, OntologyTerm>();
HashMap<String, Institute> listOfInstitute = new HashMap<String, Institute>();
HashMap<String, Person> listOfPerson = new HashMap<String, Person>();
List<String> nonInvestigationElement = new ArrayList<String>();
nonInvestigationElement.add(Person.class.getSimpleName());
nonInvestigationElement.add(Institute.class.getSimpleName());
nonInvestigationElement.add(OntologyTerm.class.getSimpleName());
List<String> referenceFields = new ArrayList<String>();
referenceFields.add(Protocol.NAME);
referenceFields.add(Protocol.FEATURES_NAME);
referenceFields.add(Protocol.SUBPROTOCOLS_NAME);
referenceFields.add(Measurement.CATEGORIES_NAME);
referenceFields.add(Measurement.UNIT_NAME);
// listOfInstitue.get(0).getLabelValue();
int sheetSize = sheets.length;
if (multipleSheets == false)
{
sheetSize = 1;
}
try
{
for (int sheetIndex = 0; sheetIndex < sheetSize; sheetIndex++)
{
Sheet sheet = sheets[sheetIndex];
int row = sheet.getRows();
int column = sheet.getColumns();
if (excelDirection.equals("UploadFileByRow"))
{
row = sheet.getColumns();
column = sheet.getRows();
}
// three dimensional matrix of<colIndex, rowIndex, valueIndex>
// third dimension of valueIndex is to deal with multiple values
// in one cell
// we made colIndex key because not all colIndexes are used
Map<Integer, List<List<InvestigationElement>>> colValues = new LinkedHashMap<Integer, List<List<InvestigationElement>>>();
Map<String, Map<String, List<InvestigationElement>>> existingValuesForClassType = new LinkedHashMap<String, Map<String, List<InvestigationElement>>>();
for (int rowIndex = 0; (rowIndex + startingRowIndex) < row; rowIndex++)
{
for (int colIndex = 0; colIndex < column; colIndex++)
{
String cellValue;
if (excelDirection.equals("UploadFileByRow")) cellValue = sheet.getCell(rowIndex, colIndex)
.getContents();
else
cellValue = sheet.getCell(colIndex, rowIndex + startingRowIndex).getContents();
TableField field = columnIndexToTableField.get(colIndex);
if (columnIndexToTableField.get(colIndex) != null
&& !columnIndexToTableField.get(colIndex).getClassType().equals("NULL"))
{
if (columnIndexToTableField.get(colIndex).getVertical() && rowIndex != 0)
{
if (!existingValuesForClassType
.containsKey(field.getClassType() + field.getFieldName()))
{
Map<String, List<InvestigationElement>> tempHolder = new LinkedHashMap<String, List<InvestigationElement>>();
existingValuesForClassType.put(field.getClassType() + field.getFieldName(),
tempHolder);
}
if (existingValuesForClassType.get(field.getClassType() + field.getFieldName())
.containsKey(cellValue))
{
// check colIndex: if there is already a
// list for colIndex
if (colValues.get(colIndex) == null)
{
colValues.put(colIndex, new ArrayList<List<InvestigationElement>>());
}
colValues.get(colIndex).add(new ArrayList<InvestigationElement>());
colValues
.get(colIndex)
.get(rowIndex - 1)
.addAll(existingValuesForClassType.get(
field.getClassType() + field.getFieldName()).get(cellValue));
}
else
{
// we split on multivalue
String[] multiValue = cellValue.split(field.getValueSplitter());
List<String> splitValue = new ArrayList<String>();
if (field.getMultipleValues().equals("true"))
{
for (int valueIndex = 0; valueIndex < multiValue.length; valueIndex++)
{
splitValue.addAll(Arrays.asList(multiValue[valueIndex].split(",")));
}
}
else
{
splitValue.addAll(Arrays.asList(multiValue));
}
int index = 0;
for (String eachValue : splitValue)
{
String value = eachValue.replaceAll("[^(a-zA-Z0-9_\\s)]", " ").trim();
// If the fieldName is 'name', added as
// a new entity
if (field.getFieldName().equalsIgnoreCase("NAME"))
{
if (nonInvestigationElement.contains(field.getClassType()))
{
if (field.getClassType().equals(Person.class.getSimpleName()))
{
Person person = new Person();
person.setName(value);
if (!listOfPerson.containsKey(value))
{
listOfPerson.put(value, person);
}
}
else if (field.getClassType().equals(Institute.class.getSimpleName()))
{
Institute ins = new Institute();
ins.setName(value);
if (!listOfInstitute.containsKey(value))
{
listOfInstitute.put(value, ins);
}
}
else if (field.getClassType()
.equals(OntologyTerm.class.getSimpleName()))
{
OntologyTerm ot = new OntologyTerm();
ot.setName(value);
if (!ontologyTermOfList.containsKey(value))
{
ontologyTermOfList.put(value, ot);
}
}
}
else
{
// For the name of eneities,
// only letters, digits,
// underscore, space are allowed
// The others are removed from
// the string
InvestigationElement entity = null;
// check colIndex: if there is
// already a list for colIndex
if (colValues.get(colIndex) == null)
{
colValues
.put(colIndex, new ArrayList<List<InvestigationElement>>());
}
// check rowIndex: if there is
// already a list values
if (colValues.get(colIndex).size() != rowIndex)
{
// create a list for our
// values (to deal with
// multivalue)
colValues.get(colIndex).add(new ArrayList<InvestigationElement>());
}
// check valueIndex: if there is
// already a value
// TODO Chao`s comment: should
// be multiValue.length instead
// of rowIndex
if (colValues.get(colIndex).get(rowIndex - 1).size() != multiValue.length)
{
// create the entity
entity = (InvestigationElement) DatabaseFactory.create()
.getClassForName(field.getClassType()).newInstance();
}
if (!value.equalsIgnoreCase(""))
{
if (field.getClassType().equals(Category.class.getSimpleName()))
{
// For the category,
// description and
// code_string could
// have other characters
// than
// the ones that are
// allowed in the name
String dsecription = eachValue;
entity.set(Category.NAME, value);
if (dsecription.split("=").length > 1)
{
entity.set(Category.CODE_STRING,
dsecription.split("=")[0].trim());
entity.set(Category.DESCRIPTION,
dsecription.split("=")[1].trim());
}
else
{
entity.set(Category.DESCRIPTION,
dsecription.split("=")[0].trim());
entity.set(Category.CODE_STRING, index);
}
// entity.set(Category.LABEL,
// value);
if (field.getDefaults().getString(Category.ISMISSING) != null) entity
.set(Category.ISMISSING,
field.getDefaults().getString(
Category.ISMISSING));
}
else
{
// set name for other
// entities other than
// category.
entity.set(field.getFieldName(), value);
}
if (investigationName != null) entity.set("Investigation_name",
investigationName);
colValues.get(colIndex).get(rowIndex - 1).add(entity);
field.addCellValue(value);
}
}
}
index++;
}
}
if (field.getDependentColumnIndex()[0] != -1 && !cellValue.equals(""))
{
for (int index = 0; index < field.getDependentColumnIndex().length; index++)
{
int dependentColumn = field.getDependentColumnIndex()[index];
// TableField dependendField =
// columnIndexToTableField.get(dependentColumn);
// InvestigationElement
// addingPropertyToEntity =
// dependendField.getEntity();
int existingRow = rowIndex;
InvestigationElement addingPropertyToEntity = null;
while (colValues.get(dependentColumn).get(existingRow - 1).size() == 0)
{
existingRow--;
}
addingPropertyToEntity = colValues.get(dependentColumn).get(existingRow - 1)
.get(0);
String multipleValues[] = cellValue.split(field.getValueSplitter());
List<Object> values = new ArrayList<Object>();
for (int i = 0; i < multipleValues.length; i++)
{
if (referenceFields.contains(field.getFieldName()))
{
values.add(multipleValues[i].replaceAll("[^(a-zA-Z0-9_\\s)]", " ")
.trim());
}
else
{
values.add(multipleValues[i].trim());
}
}
// Due to using generic method get()
// property of the Pheno Entity, so we
// don`t know which Object data
// the field would be. We need to check
// the field type first. It could be
// list, boolean, string
if (addingPropertyToEntity.get(field.getRelationString()) != null)
{
if (addingPropertyToEntity.get(field.getRelationString()).getClass()
.equals(ArrayList.class))
{
@SuppressWarnings("unchecked")
List<String> previousProperties = (List<String>) addingPropertyToEntity
.get(field.getRelationString());
if (previousProperties != null && previousProperties.size() > 0)
{
for (String newValue : previousProperties)
{
if (!values.contains(newValue))
{
values.add(newValue);
}
}
}
}
else if (addingPropertyToEntity.get(field.getRelationString()).getClass()
.equals(Boolean.class))
{
values.clear();
if (field.getRelationString().equalsIgnoreCase(Measurement.TEMPORAL))
{
if (cellValue.equalsIgnoreCase("yes"))
{
values.add(true);
}
else
{
values.add(false);
}
}
else
{
if (cellValue.equalsIgnoreCase("yes")) values.add(true);
}
}
else if (addingPropertyToEntity.get(field.getRelationString()).getClass()
.equals(String.class))
{
values.clear();
values.add(cellValue);
}
if (field.getRelationString().equals(Measurement.DATATYPE))
{
String dataType = adjustDataTypeValue(cellValue);
if (!dataType.equals(MeasurementDataType))
{
values.clear();
values.add(dataType);
}
else
{
values.clear();
values.add("string");
}
}
}
if (field.getRelationString().equals(Measurement.UNIT_NAME))
{
for (int i = 0; i < multipleValues.length; i++)
{
if (!multipleValues[i].equals(""))
{
String ontologyTermName = multipleValues[i].replaceAll(
"[^(a-zA-Z0-9_\\s)]", " ");
List<OntologyTerm> existingOntologyTermList = db.find(
OntologyTerm.class, new QueryRule(OntologyTerm.NAME,
Operator.EQUALS, ontologyTermName));
if (existingOntologyTermList.size() == 0)
{
OntologyTerm unitOntologyTerm = new OntologyTerm();
unitOntologyTerm.setName(ontologyTermName);
unitOntologyTerm.setDefinition(multipleValues[i]);
if (!ontologyTermOfList.keySet().contains(
unitOntologyTerm.getName()))
{
ontologyTermOfList.put(unitOntologyTerm.getName(),
unitOntologyTerm);
}
}
}
}
}
List<Object> removeDuplicate = new ArrayList<Object>();
for (Object o : values)
{
if (!removeDuplicate.contains(o))
{
removeDuplicate.add(o);
}
}
values = removeDuplicate;
if (values.size() == 1)
{
if (!values.get(0).equals("")) addingPropertyToEntity.set(
field.getRelationString(), values.get(0));
}
else
{
addingPropertyToEntity.set(field.getRelationString(), values);
}
}
}
if (!existingValuesForClassType.get(field.getClassType() + field.getFieldName())
.containsKey(cellValue) && colValues.containsKey(colIndex))
{
existingValuesForClassType.get(field.getClassType() + field.getFieldName()).put(
cellValue, colValues.get(colIndex).get(rowIndex - 1));
}
}
else
{
// The header is measurement!
if (rowIndex == 0)
{
if (field.getClassType().equalsIgnoreCase(ObservedValue.class.getSimpleName()))
{
// "".replaceAll("[^(a-zA-Z0-9_\\s)]",
// " ");
String measurementName = cellValue.replaceAll("[^(a-zA-Z0-9_\\s)]", " ").trim();
Measurement measurement = new Measurement();
measurement.setName(measurementName);
measurement.setInvestigation_Name(investigationName);
if (db.find(Measurement.class,
new QueryRule(Measurement.NAME, Operator.EQUALS, measurementName))
.size() != 0)
{
Measurement measure = db.find(Measurement.class,
new QueryRule(Measurement.NAME, Operator.EQUALS, measurementName))
.get(0);
if (!measure.getInvestigation_Name().equals(investigationName))
{
measurement.setName(measurementName + "_" + investigationName);
measurement.setLabel(measurementName);
checkExistingMeasurementsInDB.put(measure.getName(), measurementName
+ "_" + investigationName);
headerMeasurements.add(measurement);
}
}
else
{
headerMeasurements.add(measurement);
}
}
// The rest of the column is observedValue!
}
else
{
if (!cellValue.equals("") && cellValue != null
&& field.getObservationTarget() != -1)
{
List<String> multipleValuesInCells = new ArrayList<String>();
if (field.getMultipleValues().equals("true"))
{
multipleValuesInCells = Arrays.asList(cellValue.split(","));
}
else
{
multipleValuesInCells.add(cellValue);
}
for (String eachValue : multipleValuesInCells)
{
ObservedValue observedValue = new ObservedValue();
String headerName = sheet.getCell(colIndex, startingRowIndex).getContents()
.replaceAll("[^(a-zA-Z0-9_\\s)]", " ").trim();
String targetName = sheet
.getCell(field.getObservationTarget(), rowIndex + startingRowIndex)
.getContents().replaceAll("[^(a-zA-Z0-9_\\s)]", " ").trim();
// TODO: import measurements then
// import individual data. The
// measurement has to be consistent.
if (checkExistingMeasurementsInDB.keySet().contains(headerName))
{
headerName = checkExistingMeasurementsInDB.get(headerName);
}
observedValue.setFeature_Name(headerName);
observedValue.setTarget_Name(targetName);
TableField targetField = columnIndexToTableField.get(field
.getObservationTarget());
if (targetField.getClassType().equalsIgnoreCase(
Measurement.class.getSimpleName()))
{
if (!checkExistingMeasurementsInDB.containsKey(targetName))
{
Query<Measurement> q = db.query(Measurement.class);
q.addRules(new QueryRule(Measurement.NAME, Operator.EQUALS,
targetName));
q.addRules(new QueryRule(Measurement.INVESTIGATION_NAME,
Operator.NOT, investigationName));
if (q.find().size() > 0)
{
checkExistingMeasurementsInDB.put(targetName, targetName + "_"
+ investigationName);
observedValue.setTarget_Name(checkExistingMeasurementsInDB
.get(targetName));
}
}
else
{
observedValue.setTarget_Name(checkExistingMeasurementsInDB
.get(targetName));
}
}
observedValue.setValue(eachValue);
observedValueList.add(observedValue);
if (investigationName != null) observedValue.set("Investigation_name",
investigationName);
}
}
}
}
}
}
}
// List<InvestigationElement> measurementList = new
// ArrayList<InvestigationElement>();
// List<InvestigationElement> categoryList = new
// ArrayList<InvestigationElement>();
// List<InvestigationElement> protocolList = new
// ArrayList<InvestigationElement>();
// List<InvestigationElement> observationTargetList = new
// ArrayList<InvestigationElement>();
// List<InvestigationElement> computeProtocolList = new
// ArrayList<InvestigationElement>();
//
List<InvestigationElement> measurementListForEachSheet = new ArrayList<InvestigationElement>();
for (Integer colIndex : colValues.keySet())
{
for (List<InvestigationElement> list : colValues.get(colIndex))
{
if (columnIndexToTableField.get(colIndex).getClassType().equals("Measurement"))
{
measurementList.addAll(list);
measurementListForEachSheet.addAll(list);
}
if (columnIndexToTableField.get(colIndex).getClassType().equals("Category"))
{
categoryList.addAll(list);
}
if (columnIndexToTableField.get(colIndex).getClassType().equals("Protocol"))
{
protocolList.addAll(list);
}
if (columnIndexToTableField.get(colIndex).getClassType().equals("ObservationTarget"))
{
observationTargetList.addAll(list);
}
if (columnIndexToTableField.get(colIndex).getClassType().equals("Panel"))
{
panelList.addAll(list);
}
}
}
String sheetName = sheet.getName().replaceAll("[^(a-zA-Z0-9_\\s)]", " ").trim();
if (sheetImportProtocol == true)
{
Protocol sheetNameProtocol = new Protocol();
sheetNameProtocol.setName(sheetName);
sheetNameProtocol.setInvestigation_Name(investigationName);
List<String> nameOfMeasurements = new ArrayList<String>();
for (InvestigationElement e : measurementListForEachSheet)
{
nameOfMeasurements.add(e.getName());
}
sheetNameProtocol.setFeatures_Name(nameOfMeasurements);
protocolList.add(sheetNameProtocol);
}
}
db.beginTx();
setInvestigation();
db.update(new ArrayList<OntologyTerm>(ontologyTermOfList.values()),
Database.DatabaseAction.ADD_IGNORE_EXISTING, OntologyTerm.NAME);
db.update(new ArrayList<Institute>(listOfInstitute.values()), Database.DatabaseAction.ADD_IGNORE_EXISTING,
Institute.NAME);
db.update(new ArrayList<Person>(listOfPerson.values()), Database.DatabaseAction.ADD_IGNORE_EXISTING,
Person.NAME);
HashMap<String, InvestigationElement> removedPanelList = removeDuplicates(panelList);
checkExistenceInDB(removedPanelList, Panel.class.getSimpleName());
panelList = new ArrayList<InvestigationElement>(removedPanelList.values());
// for(InvestigationElement e : panelList){
// System.out.println(e.getName());
// }
db.update(panelList, Database.DatabaseAction.ADD_IGNORE_EXISTING, Panel.NAME);
HashMap<String, InvestigationElement> hashMapObservationTarget = removeDuplicates(observationTargetList);
checkExistenceInDB(hashMapObservationTarget, ObservationTarget.class.getSimpleName());
observationTargetList = new ArrayList<InvestigationElement>(hashMapObservationTarget.values());
int iterationForObservationTarget = 1;
while (observationTargetList.size() > iterationForObservationTarget * 5000)
{
List<InvestigationElement> subListForObservationTarget = observationTargetList.subList(
(iterationForObservationTarget - 1) * 5000, iterationForObservationTarget * 5000);
db.update(subListForObservationTarget, Database.DatabaseAction.ADD_IGNORE_EXISTING,
ObservationTarget.NAME, ObservationTarget.INVESTIGATION_NAME);
iterationForObservationTarget++;
}
List<InvestigationElement> subListForObservationTarget = observationTargetList.subList(
(iterationForObservationTarget - 1) * 5000, observationTargetList.size());
db.update(subListForObservationTarget, Database.DatabaseAction.ADD_IGNORE_EXISTING, ObservationTarget.NAME,
ObservationTarget.INVESTIGATION_NAME);
HashMap<String, InvestigationElement> hashMapCategory = removeDuplicates(categoryList);
checkExistenceInDB(hashMapCategory, Category.class.getSimpleName());
categoryList = new ArrayList<InvestigationElement>(hashMapCategory.values());
for (InvestigationElement c : categoryList)
{
System.out.println(c.getName());
if (c.getName().equals(""))
{
System.out.println();
}
}
db.update(categoryList, Database.DatabaseAction.ADD_UPDATE_EXISTING, Category.NAME,
Category.INVESTIGATION_NAME);
for (InvestigationElement m : measurementList)
{
String measurementName = m.getName();
// prevent the measurement with the same name from importing
// twice. Therefore check the database whether the
// measurement already existed, if there is, make a unique
// measurement name by combining it with investigation
// name. such as weight_study_KORA. In order to display the
// measurement with its original name such as "weight"
// a meta-measurement "display name" is created to describe the
// measurements as a label (measurement becomes
// observationElement
// in Molgenis) such as weight_study_KORA (ObservationElement)
// --------->"weight" (value) <-----------diaplay name
// (measurement)
// if(db.find(Measurement.class, new QueryRule(Measurement.NAME,
// Operator.EQUALS, measurementName)).size() != 0){
//
// //if the existing measurement comes from the same
// investigation, that means they are the same measurement,
// don`t import
// Measurement existingMeasurement = db.find(Measurement.class,
// new QueryRule(Measurement.NAME, Operator.EQUALS,
// measurementName)).get(0);
//
// if(!existingMeasurement.getInvestigation_Name().equals(investigationName)){
if (checkExistingMeasurementsInDB.containsKey(m.getName()))
{
m.set(Measurement.LABEL, measurementName);
measurementName = checkExistingMeasurementsInDB.get(m.getName());
m.setName(measurementName);
}
// Resolving importing the measurements with the same name. In
// the different studies, measurements with the same name could
// have different definitions, so we need to distinguish this
// kind of variables. Therefore a display name meta-measurement
// is created
// to describe these measurements! For example, measurement
// weight-study-1 and weight-study-2 have the same value for the
// display name, "weight"
// }
// }
List<String> categories_name = (List<String>) m.get(Measurement.CATEGORIES_NAME);
List<String> categories_new = new ArrayList<String>();
for (String eachCategory : categories_name)
{
if (checkExistingEntitesInDB.containsKey(eachCategory.toLowerCase()))
{
categories_new.add(checkExistingEntitesInDB.get(eachCategory.toLowerCase()));
}
else
{
categories_new.add(eachCategory);
}
}
if (categories_new.size() > 0)
{
List<Category> categories = db.find(Category.class, new QueryRule(Category.NAME, Operator.IN,
categories_new));
List<Integer> categoryId = new ArrayList<Integer>();
for (Category c : categories)
{
if (m.get(Measurement.NAME).equals(c.getName()))
{
c.setName(c.getName() + "_code");
db.update(c);
}
categoryId.add(c.getId());
}
m.set(Measurement.CATEGORIES, categoryId);
}
}
HashMap<String, InvestigationElement> hashMapMeasurement = removeDuplicates(measurementList);
checkExistenceInDB(hashMapMeasurement, Measurement.class.getSimpleName());
measurementList = new ArrayList<InvestigationElement>(hashMapMeasurement.values());
db.update(measurementList, Database.DatabaseAction.ADD_IGNORE_EXISTING, Measurement.NAME,
Measurement.INVESTIGATION_NAME);
// Try to update measurements
HashMap<String, InvestigationElement> hashMapProtocol = removeDuplicates(protocolList);
checkExistenceInDB(hashMapProtocol, Protocol.class.getSimpleName());
protocolList = new ArrayList<InvestigationElement>(hashMapProtocol.values());
HashMap<String, List<String>> subProtocolAndProtocol = new HashMap<String, List<String>>();
// mref is not working for the name. We can`t do
// protocol.setFeatures_name(""). Therefore we need to
// add features in db first, afterwards we could use
// protocol.setFeatures_ID(). Mref for ID is working fine
for (InvestigationElement p : protocolList)
{
@SuppressWarnings("unchecked")
List<String> feature_names = (List<String>) p.get(Protocol.FEATURES_NAME);
List<String> features_new = new ArrayList<String>();
for (String eachFeature : feature_names)
{
if (checkExistingEntitesInDB.containsKey(eachFeature.toLowerCase()))
{
features_new.add(checkExistingEntitesInDB.get(eachFeature.toLowerCase()));
}
else
{
features_new.add(eachFeature);
}
}
if (features_new.size() > 0)
{
List<Measurement> features = db.find(Measurement.class, new QueryRule(Measurement.NAME,
Operator.IN, features_new));
if (features.size() > 0)
{
List<Integer> featuresId = new ArrayList<Integer>();
for (Measurement m : features)
{
if (!featuresId.contains(m.getId())) featuresId.add(m.getId());
}
p.set(Protocol.FEATURES, featuresId);
}
}
if (p.get(Protocol.SUBPROTOCOLS_NAME) != null)
{
if (!subProtocolAndProtocol.containsKey(p.getName()))
{
subProtocolAndProtocol.put(p.getName(), (List<String>) p.get(Protocol.SUBPROTOCOLS_NAME));
}
List<String> newList = new ArrayList<String>();
p.set(Protocol.SUBPROTOCOLS_NAME, newList);
}
}
db.update(protocolList, Database.DatabaseAction.ADD_IGNORE_EXISTING, Protocol.NAME,
Protocol.INVESTIGATION_NAME);
List<InvestigationElement> subProtocols = new ArrayList<InvestigationElement>();
for (InvestigationElement p : protocolList)
{
if (p.getName().equals("Blood pressure"))
{
System.out.println();
}
if (!subProtocols.contains(p))
{
List<String> subProtocol_names = subProtocolAndProtocol.get(p.getName());
List<String> subProtocols_new = new ArrayList<String>();
for (String subProtocol : subProtocol_names)
{
if (checkExistingEntitesInDB.containsKey(subProtocol.toLowerCase()))
{
subProtocols_new.add(checkExistingEntitesInDB.get(subProtocol.toLowerCase()));
}
else
{
subProtocols_new.add(subProtocol);
}
}
if (subProtocols_new.size() > 0)
{
Query<Protocol> q = db.query(Protocol.class);
q.addRules(new QueryRule(Protocol.INVESTIGATION_NAME, Operator.EQUALS, investigationName));
q.addRules(new QueryRule(Protocol.NAME, Operator.IN, subProtocols_new));
List<Protocol> subProtocolList = q.find();
if (subProtocolList.size() > 0)
{
List<Integer> subProtocolId = new ArrayList<Integer>();
for (Protocol subPro : subProtocolList)
{
if (!subProtocolId.contains(subPro.getId()))
{
subProtocolId.add(subPro.getId());
}
}
p.set(Protocol.SUBPROTOCOLS, subProtocolId);
}
}
if (p.getId() != null) db.update(p);
}
}
db.update(headerMeasurements, Database.DatabaseAction.ADD_IGNORE_EXISTING, Measurement.NAME,
Measurement.INVESTIGATION_NAME);
observedValueList = removeDuplicatesObservedValue(observedValueList);
int iteration = 1;
while (observedValueList.size() > iteration * 5000)
{
List<ObservedValue> subList = observedValueList.subList((iteration - 1) * 5000, iteration * 5000);
db.update(subList, Database.DatabaseAction.ADD_IGNORE_EXISTING, ObservedValue.INVESTIGATION_NAME,
ObservedValue.VALUE, ObservedValue.FEATURE_NAME, ObservedValue.TARGET_NAME);
iteration++;
}
List<ObservedValue> subList = observedValueList.subList((iteration - 1) * 5000, observedValueList.size());
// for(ObservedValue ov : subList){
// System.out.println(ov);
// db.add(ov);
// }
db.update(subList, Database.DatabaseAction.ADD_IGNORE_EXISTING, ObservedValue.INVESTIGATION_NAME,
ObservedValue.VALUE, ObservedValue.FEATURE_NAME, ObservedValue.TARGET_NAME);
// for(ObservedValue ov : observedValueList){
// System.out.println(ov);
// db.add(ov);
// }
db.commitTx();
observationTargetList.clear();
measurementList.clear();
protocolList.clear();
categoryList.clear();
observationTargetList.clear();
ontologyTermOfList.clear();
}
catch (Exception e)
{
e.printStackTrace();
db.rollbackTx();
// TODO Auto-generated catch block
}
}
private void checkExistenceInDB(HashMap<String, InvestigationElement> hashMap, String ClassType)
throws DatabaseException
{
List<String> names = new ArrayList<String>(hashMap.keySet());
// checkExistingEntitesInDB.clear();
if (names.size() > 0)
{
if (ClassType.equals(Category.class.getSimpleName()) || ClassType.equals(Measurement.class.getSimpleName())
|| ClassType.equals(ObservationTarget.class.getSimpleName())
|| ClassType.equals(Panel.class.getSimpleName()))
{
for (Category c : db.find(Category.class, new QueryRule("name", Operator.IN, names)))
{
if (!c.getInvestigation_Name().equals(investigationName))
{
InvestigationElement categoryToAdd = hashMap.get(c.getName().toLowerCase());
categoryToAdd.setName(categoryToAdd.getName() + "_" + ClassType + "_" + investigationName);
checkExistingEntitesInDB.put(c.getName().toLowerCase(), categoryToAdd.getName());
}
else
{
hashMap.remove(c.getName().toLowerCase());
}
}
for (Measurement m : db.find(Measurement.class, new QueryRule("name", Operator.IN, names)))
{
if (!m.getInvestigation_Name().equals(investigationName))
{
InvestigationElement categoryToAdd = hashMap.get(m.getName().toLowerCase());
categoryToAdd.setName(categoryToAdd.getName() + "_" + ClassType + "_" + investigationName);
checkExistingEntitesInDB.put(m.getName().toLowerCase(), categoryToAdd.getName());
}
else
{
hashMap.remove(m.getName().toLowerCase());
}
}
for (ObservationTarget ot : db.find(ObservationTarget.class, new QueryRule("name", Operator.IN, names)))
{
if (!ot.getInvestigation_Name().equals(investigationName))
{
InvestigationElement categoryToAdd = hashMap.get(ot.getName().toLowerCase());
categoryToAdd.setName(categoryToAdd.getName() + "_" + ClassType + "_" + investigationName);
checkExistingEntitesInDB.put(ot.getName().toLowerCase(), categoryToAdd.getName());
}
else
{
hashMap.remove(ot.getName().toLowerCase());
}
}
}
if (ClassType.equals(ComputeProtocol.class.getSimpleName())
|| ClassType.equals(Protocol.class.getSimpleName()))
{
for (Protocol p : db.find(Protocol.class, new QueryRule("name", Operator.IN, names)))
{
if (!p.getInvestigation_Name().equals(investigationName))
{
InvestigationElement categoryToAdd = hashMap.get(p.getName().toLowerCase());
categoryToAdd.setName(categoryToAdd.getName() + "_" + ClassType + "_" + investigationName);
checkExistingEntitesInDB.put(p.getName().toLowerCase(), categoryToAdd.getName());
}
else
{
hashMap.remove(p.getName().toLowerCase());
}
}
for (ComputeProtocol p : db.find(ComputeProtocol.class, new QueryRule("name", Operator.IN, names)))
{
if (!p.getInvestigation_Name().equals(investigationName))
{
InvestigationElement categoryToAdd = hashMap.get(p.getName().toLowerCase());
categoryToAdd.setName(categoryToAdd.getName() + "_" + ClassType + "_" + investigationName);
checkExistingEntitesInDB.put(p.getName().toLowerCase(), categoryToAdd.getName());
}
else
{
hashMap.remove(p.getName().toLowerCase());
}
}
}
}
}
private List<ObservedValue> removeDuplicatesObservedValue(List<ObservedValue> observedValueList)
throws DatabaseException
{
List<ObservedValue> uniqueValues = new ArrayList<ObservedValue>();
List<String> uniqueCombination = new ArrayList<String>();
for (ObservedValue ov : observedValueList)
{
String combination = ov.getTarget_Name() + ov.getFeature_Name() + ov.getValue();
if (!uniqueCombination.contains(combination))
{
uniqueCombination.add(combination);
Query<ObservedValue> q = db.query(ObservedValue.class);
q.addRules(new QueryRule(ObservedValue.VALUE, Operator.EQUALS, ov.getValue()));
q.addRules(new QueryRule(ObservedValue.TARGET_NAME, Operator.EQUALS, ov.getTarget_Name()));
q.addRules(new QueryRule(ObservedValue.FEATURE_NAME, Operator.EQUALS, ov.getFeature_Name()));
q.addRules(new QueryRule(ObservedValue.INVESTIGATION_NAME, Operator.EQUALS, ov.getInvestigation_Name()));
if (q.find().size() == 0)
{
uniqueValues.add(ov);
}
}
}
return uniqueValues;
}
private HashMap<String, InvestigationElement> removeDuplicates(List<InvestigationElement> listOfObjectsToAdd)
throws DatabaseException
{
HashMap<String, InvestigationElement> addedName = new HashMap<String, InvestigationElement>();
for (InvestigationElement eachElement : listOfObjectsToAdd)
{
if (!addedName.containsKey(eachElement.getName().toLowerCase()))
{
addedName.put(eachElement.getName().toLowerCase(), eachElement);
}
}
return addedName;
}
private String adjustDataTypeValue(String cellValue)
{
for (String keySet : InputToMolgenisDataType.keySet())
{
Pattern p = Pattern.compile(keySet, Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(cellValue);
if (m.find())
{
return InputToMolgenisDataType.get(keySet);
}
}
return MeasurementDataType;
}
public void setDataType(String dataTypeInput, String molgenisDataType)
{
InputToMolgenisDataType.put(dataTypeInput, molgenisDataType);
}
public void setMissingCategoryIndex(int missingCategoryIndex)
{
missingCategoryList.add(missingCategoryIndex);
}
public void setInvestigationName(String investigationName)
{
if (investigationName != null)
{
this.investigationName = investigationName;
}
}
public void setInvestigation() throws DatabaseException
{
Investigation investigation = new Investigation();
if (investigationName != null
&& db.query(Investigation.class).eq(Investigation.NAME, investigationName).count() == 0)
{
investigation.setName(investigationName);
db.add(investigation);
}
}
public void setDirection(String excelDirection)
{
this.excelDirection = excelDirection;
}
}