/* * Copyright (c) 2017 OBiBa. All rights reserved. * * This program and the accompanying materials * are made available under the terms of the GNU Public License v3.0. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.obiba.magma.datasource.excel.support; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.poi.ss.usermodel.Cell; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.obiba.magma.Attribute; import org.obiba.magma.AttributeAware; import org.obiba.magma.AttributeAwareBuilder; import org.obiba.magma.Attributes; import org.obiba.magma.Category; import org.obiba.magma.ValueType; import org.obiba.magma.Variable; import org.obiba.magma.datasource.excel.ExcelDatasource; import org.obiba.magma.datasource.excel.ExcelValueTable; import org.obiba.magma.datasource.excel.ExcelValueTableWriter; import org.obiba.magma.type.BooleanType; import org.obiba.magma.type.IntegerType; import org.obiba.magma.type.TextType; import javax.annotation.Nullable; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Collectors; public class VariableConverter { public static final String TABLE = "table"; public static final String NAME = "name"; public static final String VALUE_TYPE = "valueType"; public static final String ENTITY_TYPE = "entityType"; public static final String REFERENCED_ENTITY_TYPE = "referencedEntityType"; public static final String MIME_TYPE = "mimeType"; public static final String UNIT = "unit"; public static final String INDEX = "index"; public static final String REPEATABLE = "repeatable"; public static final String OCCURRENCE_GROUP = "occurrenceGroup"; public static final String VARIABLE = "variable"; public static final String CODE = "code"; public static final String MISSING = "missing"; public static final List<String> reservedVariableHeaders = Lists.newArrayList(TABLE, // NAME, // VALUE_TYPE, // ENTITY_TYPE, // REFERENCED_ENTITY_TYPE, // MIME_TYPE, // UNIT, // REPEATABLE, // OCCURRENCE_GROUP, // INDEX); public static final List<String> reservedCategoryHeaders = Lists.newArrayList(TABLE, // VARIABLE, // NAME, // CODE, // MISSING); private Map<String, Integer> headerMapVariables; private Map<String, Integer> headerMapCategories; private final Map<String, Integer> cachedHeaderMapVariables = Maps.newHashMap(); private final Map<String, Integer> cachedHeaderMapCategories = Maps.newHashMap(); private Set<String> attributeNamesCategories; private Set<String> attributeNamesVariables; /** * Maps a variable's name to its Row in the variablesSheet */ private final Map<String, Row> variableRows; /** * Maps a category's name concatenated with the variable's name to its Row in the variablesSheet */ private final Map<String, Row> categoryRows; private final ExcelValueTable valueTable; public VariableConverter(ExcelValueTable valueTable) { this.valueTable = valueTable; variableRows = Maps.newHashMap(); categoryRows = Maps.newHashMap(); } /** * Check if is a variable row for the current table. * * @param variableRow * @return */ public boolean isVariableRow(Row variableRow) { Integer idx = getVariableHeaderIndex(TABLE); String table = ExcelDatasource.DEFAULT_TABLE_NAME; if(idx != null) { table = getVariableCellValue(variableRow, TABLE); } return valueTable.getName().equals(table); } public String getVariableName(Row variableRow) { return getVariableCellValue(variableRow, NAME).trim(); } // // unmarshall // @SuppressWarnings({ "OverlyLongMethod", "PMD.NcssMethodCount" }) public Variable unmarshall(Row variableRow, int position) { String tableName = valueTable.getName(); int rowNum = variableRow.getRowNum(); String name = getVariableCellValue(variableRow, NAME).trim(); String table = getVariableCellValue(variableRow, TABLE).trim(); if(name.isEmpty()) { throw new ExcelDatasourceParsingException("Variable name is required in table: " + tableName, // "VariableNameRequired", ExcelDatasource.VARIABLES_SHEET, rowNum + 1, tableName); } if(name.contains(":")) { throw new ExcelDatasourceParsingException( "Variable name cannot contain ':' in variable: " + tableName + " / " + name, // "VariableNameCannotContainColon", ExcelDatasource.VARIABLES_SHEET, rowNum + 1, tableName, name); } String entityType = getVariableCellValue(variableRow, ENTITY_TYPE).trim(); if(entityType.isEmpty()) entityType = "Participant"; ValueType valueType = unmarshallValueType(variableRow, name, tableName, rowNum); Variable.Builder builder = Variable.Builder.newVariable(name, valueType, entityType); unmarshallMimeType(variableRow, builder); unmarshallUnit(variableRow, builder); unmarshallIndex(variableRow, builder, position); unmarshallOccurrenceGroup(variableRow, builder); unmarshallRepeatable(variableRow, builder); unmarshallReferencedEntityType(variableRow, builder); unmarshallCustomAttributes(variableRow, getHeaderMapVariables(), getAttributeNamesVariables(), builder); unmarshallCategories(tableName, name, builder); variableRows.put(keyOfRow(table, name), variableRow); return builder.build(); } private void unmarshallReferencedEntityType(Row variableRow, Variable.Builder builder) { String referencedEntityType = getVariableCellValue(variableRow, REFERENCED_ENTITY_TYPE).trim(); if(referencedEntityType.length() > 0) { builder.referencedEntityType(referencedEntityType); } } private void unmarshallRepeatable(Row variableRow, Variable.Builder builder) { boolean repeatable = parseBoolean(getVariableCellValue(variableRow, REPEATABLE).trim()); if(repeatable) { builder.repeatable(); } } private void unmarshallOccurrenceGroup(Row variableRow, Variable.Builder builder) { String occurrenceGroup = getVariableCellValue(variableRow, OCCURRENCE_GROUP).trim(); if(occurrenceGroup.length() > 0) { builder.occurrenceGroup(occurrenceGroup); } } private void unmarshallUnit(Row variableRow, Variable.Builder builder) { String unit = getVariableCellValue(variableRow, UNIT).trim(); if(unit.length() > 0) { builder.unit(unit); } } private void unmarshallIndex(Row variableRow, Variable.Builder builder, int position) { String index = getVariableCellValue(variableRow, INDEX).trim(); if(!Strings.isNullOrEmpty(index)) { try { builder.index(Integer.valueOf(index)); } catch(NumberFormatException e) { builder.index(position); } } else { builder.index(position); } } private void unmarshallMimeType(Row variableRow, Variable.Builder builder) { String mimeType = getVariableCellValue(variableRow, MIME_TYPE).trim(); if(mimeType.length() > 0) { builder.mimeType(mimeType); } } private ValueType unmarshallValueType(Row variableRow, String name, String tableName, int rowNum) { ValueType valueType; String valueTypeStr = getVariableCellValue(variableRow, VALUE_TYPE).trim(); if(valueTypeStr.isEmpty() || "string".equalsIgnoreCase(valueTypeStr)) { valueType = TextType.get(); } else { try { valueType = ValueType.Factory.forName(valueTypeStr); } catch(Exception e) { throw new ExcelDatasourceParsingException( "Unknown value type '" + valueTypeStr + "' for variable: " + tableName + " / " + name, // "UnknownValueType", ExcelDatasource.VARIABLES_SHEET, rowNum + 1, tableName, name, valueTypeStr); } } return valueType; } /** * Read the Categories of a specific Variable in the "Categories" Excel sheet. Add these Categories to the specified * Variable using the Variable.Builder. * * @param variableName * @param variableBuilder */ private void unmarshallCategories(String tableName, String variableName, Variable.Builder variableBuilder) { if(getHeaderMapCategories() == null) return; Sheet categoriesSheet = valueTable.getDatasource().getCategoriesSheet(); List<Integer> variableCategoryRows = valueTable.getVariableCategoryRows(variableName); Collection<String> categoryNames = new ArrayList<>(); Collection<ExcelDatasourceParsingException> errors = new ArrayList<>(); Row firstRow = null; for(int rowIndex : variableCategoryRows) { Row categoryRow = categoriesSheet.getRow(rowIndex); if(firstRow == null) firstRow = categoryRow; unmarshallCategories(tableName, variableName, variableBuilder, categoryNames, errors, categoryRow); } if(errors.size() > 0) { ExcelDatasourceParsingException parent = new ExcelDatasourceParsingException( "Errors while parsing categories of variable: " + valueTable.getName() + " / " + variableName, "VariableCategoriesDefinitionErrors", ExcelDatasource.CATEGORIES_SHEET, firstRow.getRowNum() + 1, valueTable.getName(), variableName); parent.setChildren(errors); throw parent; } } private void unmarshallCategories(String tableName, String variableName, Variable.Builder variableBuilder, Collection<String> categoryNames, Collection<ExcelDatasourceParsingException> errors, Row categoryRow) { try { Category category = unmarshallCategory(variableName, categoryRow); if(categoryNames.contains(category.getName())) { errors.add(new ExcelDatasourceParsingException( "Duplicate category name in variable: " + valueTable.getName() + " / " + variableName, "DuplicateCategoryName", ExcelDatasource.CATEGORIES_SHEET, categoryRow.getRowNum() + 1, valueTable.getName(), variableName, category.getName())); } else { categoryNames.add(category.getName()); variableBuilder.addCategory(category); categoryRows.put(keyOfRowForCategory(tableName, variableName, category), categoryRow); } } catch(ExcelDatasourceParsingException pe) { errors.add(pe); } catch(Exception e) { errors.add(new ExcelDatasourceParsingException("Unexpected error in category: " + e.getMessage(), e, "UnexpectedErrorInCategory", ExcelDatasource.CATEGORIES_SHEET, categoryRow.getRowNum() + 1, valueTable.getName(), variableName)); } } private Category unmarshallCategory(String variableName, Row categoryRow) { String name = getCategoryCellValue(categoryRow, NAME).trim(); String code = getCategoryCellValue(categoryRow, CODE).trim(); boolean missing = parseBoolean(getCategoryCellValue(categoryRow, MISSING).trim()); // required values if(name.isEmpty()) { throw new ExcelDatasourceParsingException( "Category name is required for variable: " + valueTable.getName() + " / " + variableName, "CategoryNameRequired", ExcelDatasource.CATEGORIES_SHEET, categoryRow.getRowNum() + 1, valueTable.getName(), variableName); } Category.Builder builder = Category.Builder.newCategory(name).missing(missing); // default values if(code.length() > 0) { builder.withCode(code); } unmarshallCustomAttributes(categoryRow, getHeaderMapCategories(), getAttributeNamesCategories(), builder); return builder.build(); } private boolean parseBoolean(String str) { return "1".equals(str) || "true".equalsIgnoreCase(str) || "yes".equalsIgnoreCase(str) || "y".equalsIgnoreCase(str) || "x".equalsIgnoreCase(str); } /** * Read the custom Attributes define by a Row in an Excel sheet to an AttributeAware instance (ex: Variable, * Category...) using an AttributeAwareBuilder. * * @param headerMap * @param attributeNames */ private void unmarshallCustomAttributes(Row attributesRow, Map<String, Integer> headerMap, Iterable<String> attributeNames, AttributeAwareBuilder<?> builder) { for(String attributeName : attributeNames) { String cellValueAsString = getCellValueAsString(attributesRow.getCell(headerMap.get(attributeName))); if(cellValueAsString.isEmpty()) { continue; } Attribute.Builder attr = Attributes.decodeFromHeader(attributeName); builder.addAttribute(attr.withValue(cellValueAsString).build()); } } // // marshall // public Row marshall(ExcelValueTableWriter.VariableWithMetadata variableWithMetadata) { Variable variable = variableWithMetadata.getVariable(); Row variableRow = getRow(variableWithMetadata); ExcelUtil.setCellValue(getVariableCell(variableRow, TABLE), TextType.get(), variableWithMetadata.getTableName()); ExcelUtil.setCellValue(getVariableCell(variableRow, NAME), TextType.get(), variable.getName()); ExcelUtil.setCellValue(getVariableCell(variableRow, MIME_TYPE), TextType.get(), variable.getMimeType()); ExcelUtil .setCellValue(getVariableCell(variableRow, OCCURRENCE_GROUP), TextType.get(), variable.getOccurrenceGroup()); ExcelUtil.setCellValue(getVariableCell(variableRow, ENTITY_TYPE), TextType.get(), variable.getEntityType()); ExcelUtil.setCellValue(getVariableCell(variableRow, UNIT), TextType.get(), variable.getUnit()); ExcelUtil.setCellValue(getVariableCell(variableRow, INDEX), IntegerType.get(), variable.getIndex()); ExcelUtil.setCellValue(getVariableCell(variableRow, REPEATABLE), BooleanType.get(), variable.isRepeatable()); ExcelUtil.setCellValue(getVariableCell(variableRow, VALUE_TYPE), TextType.get(), variable.getValueType().getName()); ExcelUtil.setCellValue(getVariableCell(variableRow, REFERENCED_ENTITY_TYPE), TextType.get(), variable.getReferencedEntityType()); marshallCustomAttributes(variable, variableRow, headerMapVariables); marshallCategories(variableWithMetadata); return variableRow; } private void marshallCategories(ExcelValueTableWriter.VariableWithMetadata variableWithMetadata) { for (Category category : variableWithMetadata.getVariable().getCategories()) { Row categoryRow = getCategoryRow(variableWithMetadata, category); ExcelUtil.setCellValue(getCategoryCell(categoryRow, TABLE), TextType.get(), variableWithMetadata.getTableName()); ExcelUtil.setCellValue(getCategoryCell(categoryRow, VARIABLE), TextType.get(), variableWithMetadata.getVariable().getName()); ExcelUtil.setCellValue(getCategoryCell(categoryRow, NAME), TextType.get(), category.getName()); ExcelUtil.setCellValue(getCategoryCell(categoryRow, CODE), TextType.get(), category.getCode()); ExcelUtil.setCellValue(getCategoryCell(categoryRow, MISSING), BooleanType.get(), category.isMissing()); marshallCustomAttributes(category, categoryRow, headerMapCategories); } } /** * Writes the custom Attributes of an AttributeAware instance (ex: Variable, Category...) to a Row in an Excel sheet. */ private void marshallCustomAttributes(AttributeAware attributeAware, Row attributesRow, Map<String, Integer> headerMap) { for(Attribute customAttribute : attributeAware.getAttributes()) { String headerValue = Attributes.encodeForHeader(customAttribute); Integer attributeCellIndex = headerMap.get(headerValue); ExcelUtil.setCellValue(attributesRow.getCell(attributeCellIndex, Row.CREATE_NULL_AS_BLANK), customAttribute.getValue()); } } public void createVariablesHeaders(List<Attribute> attributes, Row headerRow) { createHeaders(attributes, headerRow, headerMapVariables); } public void createCategoriesHeaders(List<Attribute> attributes, Row headerRow) { createHeaders(attributes, headerRow, headerMapCategories); } private void createHeaders(List<Attribute> attributes, Row headerRow, Map<String, Integer> headerMap) { List<String> collect = attributes.stream() .map(Attributes::encodeForHeader) .distinct() .sorted(new FullQualifiedAttributeComparator()) .collect(Collectors.toList()); for (String headerValue : collect) { Integer attributeCellIndex = headerMap.get(headerValue); if(attributeCellIndex == null) { headerMap.put(headerValue, getLastCellNum(headerRow)); attributeCellIndex = getLastCellNum(headerRow); Cell headerCell = headerRow.createCell(attributeCellIndex); headerCell.setCellValue(headerValue); headerCell.setCellStyle(valueTable.getDatasource().getHeaderCellStyle()); } } } // // utility methods // private class FullQualifiedAttributeComparator implements Comparator<String> { private static final String DELIMITER = "::"; @Override public int compare(String o1, String o2) { if (hasNamespace(o1) == hasNamespace(o2)) return o1.compareTo(o2); else if (hasNamespace(o1)) return 1; else return -1; } private boolean hasNamespace(String o1) { return o1.contains(DELIMITER); } } /** * Returns the {@code Row} from the variable sheet for the specified variable. If no such row currently exists, a new * one is added and returned. */ private Row getRow(ExcelValueTableWriter.VariableWithMetadata variableWithMetadata) { Row row = variableRows.get(keyOfRowForVariables(variableWithMetadata)); if(row == null) { Sheet variables = valueTable.getDatasource().getVariablesSheet(); row = variables.createRow(variables.getPhysicalNumberOfRows()); variableRows.put(keyOfRowForVariables(variableWithMetadata), row); } return row; } private String keyOfRowForVariables(ExcelValueTableWriter.VariableWithMetadata variableWithMetadata) { return keyOfRow(variableWithMetadata.getTableName(), variableWithMetadata.getVariable().getName()); } private String keyOfRowForCategory(ExcelValueTableWriter.VariableWithMetadata variableWithMetadata, Category category) { return keyOfRowForCategory(variableWithMetadata.getTableName(), variableWithMetadata.getVariable().getName(), category); } private String keyOfRowForCategory(String tableName, String variableName, Category category) { return keyOfRow(tableName,variableName + category.getName()); } private String keyOfRow(String tableName, String variableName) { return tableName + FullQualifiedAttributeComparator.DELIMITER + variableName; } /** * Returns the {@code Row} from the variable sheet for the specified variable. If no such row currently exists, a new * one is added and returned. */ private Row getCategoryRow(ExcelValueTableWriter.VariableWithMetadata variableWithMetadata, Category category) { String rowKey = keyOfRowForCategory(variableWithMetadata, category); Row row = categoryRows.get(rowKey); if(row == null) { Sheet categories = valueTable.getDatasource().getCategoriesSheet(); row = categories.createRow(categories.getPhysicalNumberOfRows()); categoryRows.put(rowKey, row); } return row; } /** * Get the index of the last cell in the row. * * @param row * @return 0 if there is no cell */ private int getLastCellNum(Row row) { return row.getLastCellNum() == -1 ? 0 : row.getLastCellNum(); } /** * Get the value in the cell as a string. * * @param cell * @return empty string if cell is null */ private String getCellValueAsString(Cell cell) { return ExcelUtil.getCellValueAsString(cell); } /** * Get the variable cell for given row at given header column. Create one if missing. * * @param row * @param header * @return null if no such header */ @Nullable private Cell getVariableCell(Row row, String header) { Integer idx = getVariableHeaderIndex(header); return idx == null ? null : row.getCell(idx, Row.CREATE_NULL_AS_BLANK); } /** * Get the variable cell value for given row at given header. * * @param row * @param header * @return empty string if no such cell */ private String getVariableCellValue(Row row, String header) { if (row == null) return ""; Integer idx = getVariableHeaderIndex(header); return idx != null ? getCellValueAsString(row.getCell(idx)).trim() : ""; } /** * Get the 0-based index of the variable column at the given header. * * @param header * @return null if no such header */ @Nullable public Integer getVariableHeaderIndex(String header) { return reservedVariableHeaders.contains(header) // ? getHeaderIndex(getHeaderMapVariables(), cachedHeaderMapVariables, header) // : getHeaderMapVariables().get(header); } /** * Set the 0-based index of a variable column at the given header. * * @param header * @param idx */ public void putVariableHeaderIndex(String header, Integer idx) { getHeaderMapVariables().put(header, idx); } /** * Get the map between a variable header and the 0-based index of the column. * * @return */ private Map<String, Integer> getHeaderMapVariables() { if(headerMapVariables == null) { headerMapVariables = valueTable.getDatasource().getVariablesHeaderMap(); } return headerMapVariables; } /** * Get the set of attribute headers for the variables. * * @return */ private Iterable<String> getAttributeNamesVariables() { if(attributeNamesVariables == null) { attributeNamesVariables = valueTable.getDatasource().getVariablesCustomAttributeNames(); } return attributeNamesVariables; } /** * Get the category cell for given row at given header column. Create one if missing. * * @param row * @param header * @return null if no such header */ @Nullable private Cell getCategoryCell(Row row, String header) { Integer idx = getCategoryHeaderIndex(header); return idx == null || row == null ? null : row.getCell(idx, Row.CREATE_NULL_AS_BLANK); } /** * Get the category cell value for given row at given header. * * @param row * @param header * @return empty string if no such cell */ private String getCategoryCellValue(Row row, String header) { Integer idx = getCategoryHeaderIndex(header); return idx != null && row != null ? getCellValueAsString(row.getCell(idx)).trim() : ""; } /** * Get the 0-based index of the category column at the given header. * * @param header * @return null if no such header */ @Nullable public Integer getCategoryHeaderIndex(String header) { return reservedCategoryHeaders.contains(header) // ? getHeaderIndex(getHeaderMapCategories(), cachedHeaderMapCategories, header) // : getHeaderMapCategories().get(header); } /** * Set the 0-based index of a category column at the given header. * * @param header * @param idx */ public void putCategoryHeaderIndex(String header, Integer idx) { getHeaderMapCategories().put(header, idx); } /** * Get the 0-based index of the column at the given header relatively to the header map. * * @param headerMap * @param cachedHeaderMap * @param header * @return null if no such header * @see ExcelUtil#findNormalizedHeader(Iterable, String) */ @Nullable private Integer getHeaderIndex(Map<String, Integer> headerMap, Map<String, Integer> cachedHeaderMap, String header) { Integer idx = cachedHeaderMap.get(header); if(idx == null) { String found = ExcelUtil.findNormalizedHeader(headerMap.keySet(), header); if(found != null) { idx = headerMap.get(found); if(idx != null) { cachedHeaderMap.put(header, idx); } } } return idx; } /** * Get the map between a category header and the 0-based index of the column. * * @return */ private Map<String, Integer> getHeaderMapCategories() { if(headerMapCategories == null) { headerMapCategories = valueTable.getDatasource().getCategoriesHeaderMap(); } return headerMapCategories; } /** * Get the set of attribute headers for the categories. * * @return */ private Iterable<String> getAttributeNamesCategories() { if(attributeNamesCategories == null) { attributeNamesCategories = valueTable.getDatasource().getCategoriesCustomAttributeNames(); } return attributeNamesCategories; } /** * Get the table name for a category row. * * @param categoryRow * @return if no table column is defined, returns the default table name */ public String getCategoryTableName(Row categoryRow) { Integer idx = getVariableHeaderIndex(TABLE); return idx == null ? ExcelDatasource.DEFAULT_TABLE_NAME : getCategoryCellValue(categoryRow, TABLE).trim(); } /** * Get the variable name for a category row. * * @param categoryRow * @return */ public String getCategoryVariableName(Row categoryRow) { return getCategoryCellValue(categoryRow, VARIABLE).trim(); } }