/*************************************************************************** * Copyright 2010 Global Biodiversity Information Facility Secretariat * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. ***************************************************************************/ package org.gbif.ipt.action.manage; import org.gbif.dwc.terms.Term; import org.gbif.dwc.terms.TermFactory; import org.gbif.ipt.config.AppConfig; import org.gbif.ipt.config.Constants; import org.gbif.ipt.model.Extension; import org.gbif.ipt.model.ExtensionMapping; import org.gbif.ipt.model.ExtensionProperty; import org.gbif.ipt.model.PropertyMapping; import org.gbif.ipt.model.RecordFilter; import org.gbif.ipt.model.RecordFilter.Comparator; import org.gbif.ipt.model.Source; import org.gbif.ipt.model.TextFileSource; import org.gbif.ipt.service.admin.ExtensionManager; import org.gbif.ipt.service.admin.RegistrationManager; import org.gbif.ipt.service.admin.VocabulariesManager; import org.gbif.ipt.service.manage.ResourceManager; import org.gbif.ipt.service.manage.SourceManager; import org.gbif.ipt.struts2.SimpleTextProvider; import org.gbif.ipt.validation.ExtensionMappingValidator; import org.gbif.ipt.validation.ExtensionMappingValidator.ValidationStatus; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.regex.Pattern; import com.google.common.base.MoreObjects; import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.inject.Inject; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; /** * A rather complex action that deals with a single mapping configuration. * The prepare method does a lot of work. * For initial GET requests linked from the overview the prepare() method decides on the result name, i.e. which * template to call. * We dont use any regular validation here but only raise warnings to the user. * So the save method is always executed for POST requests, but not for GETs. * Please dont add any action errors as this will trigger the validation interceptor and causes problems, use * addActionWarning() instead. * * @author markus */ public class MappingAction extends ManagerBaseAction { private static final long serialVersionUID = -831969146160030857L; // logging private static final Logger LOG = Logger.getLogger(MappingAction.class); private static final Pattern NORM_TERM = Pattern.compile("[\\W\\s_0-9]+"); private final ExtensionManager extensionManager; private final SourceManager sourceManager; private final VocabulariesManager vocabManager; // config private ExtensionMapping mapping; private List<String> columns; private final Comparator[] comparators = Comparator.values(); private List<String[]> peek; private List<PropertyMapping> fields; private Map<String, Integer> fieldsTermIndices = Maps.newHashMap(); private Map<String, List<PropertyMapping>> fieldsByGroup = Maps.newLinkedHashMap(); private final Map<String, Map<String, String>> vocabTerms = Maps.newHashMap(); private ExtensionProperty coreid; private ExtensionProperty datasetId; private Integer mid; private PropertyMapping mappingCoreid; private boolean doiUsedForDatasetId; @Inject public MappingAction(SimpleTextProvider textProvider, AppConfig cfg, RegistrationManager registrationManager, ResourceManager resourceManager, ExtensionManager extensionManager, SourceManager sourceManager, VocabulariesManager vocabManager) { super(textProvider, cfg, registrationManager, resourceManager); this.extensionManager = extensionManager; this.sourceManager = sourceManager; this.vocabManager = vocabManager; } /** * Validate the mapping and report any warning or errors, shown on the mapping page. */ private void validateAndReport() { if (mapping.getSource() == null) { return; } ExtensionMappingValidator validator = new ExtensionMappingValidator(); ValidationStatus v = validator.validate(mapping, resource, peek, columns); if (v != null && !v.isValid()) { if (v.getIdProblem() != null) { addActionWarning(getText(v.getIdProblem(), v.getIdProblemParams())); } for (Term t : v.getMissingRequiredFields()) { addActionWarning(getText("validation.required", new String[] {t.simpleName()})); } for (Term t : v.getWrongDataTypeFields()) { addActionWarning(getText("validation.wrong.datatype", new String[] {t.simpleName()})); } // report columns that have been translated multiple times for (String columnName : v.getMultipleTranslationsForSameColumn()) { addActionError(getText("validation.column.multipleTranslations", new String[] {columnName})); } } } /** * This method automaps a source's columns. First it tries to automap the mappingCoreId column, and then it tries * to automap the source's remaining fields against the core/extension. * * @return the number of terms that have been automapped */ int automap() { // keep track of how many terms were automapped int automapped = 0; // start by trying to automap the mappingCoreId (occurrenceId/taxonId) to a column in source int idx1 = 0; for (String col : columns) { String normCol = normalizeColumnName(col); if (normCol != null && TermFactory.normaliseTerm(mappingCoreid.getTerm().simpleName()) .equalsIgnoreCase(normCol)) { // mappingCoreId and mapping id column must both be set (and have the same index) to automap successfully. mappingCoreid.setIndex(idx1); mapping.setIdColumn(idx1); // we have automapped the core id column, so increment automapped counter and exit automapped++; break; } idx1++; } // next, try to automap the source's remaining columns against the extensions fields for (PropertyMapping f : fields) { int idx2 = 0; for (String col : columns) { String normCol = normalizeColumnName(col); if (normCol != null && TermFactory.normaliseTerm(f.getTerm().simpleName()).equalsIgnoreCase(normCol)) { f.setIndex(idx2); // we have automapped the term, so increment automapped counter and exit automapped++; break; } idx2++; } } return automapped; } public String cancel() { resource.deleteMapping(mapping); // set mappings modified date resource.setMappingsModified(new Date()); // save resource saveResource(); return SUCCESS; } @Override public String delete() { if (resource.deleteMapping(mapping)) { addActionMessage(getText("manage.mapping.deleted", new String[] {id})); // set mappings modified date resource.setMappingsModified(new Date()); // save resource saveResource(); } else { addActionMessage(getText("manage.mapping.couldnt.delete", new String[] {id})); } return SUCCESS; } public List<String> getColumns() { return columns; } public Comparator[] getComparators() { return comparators; } public ExtensionProperty getCoreid() { return coreid; } public List<PropertyMapping> getFields() { return fields; } public ExtensionMapping getMapping() { return mapping; } public PropertyMapping getMappingCoreid() { return mappingCoreid; } public ExtensionProperty getDatasetId() { return datasetId; } public Integer getMid() { return mid; } /** * @return list of columns in the source data that have not been mapped to field(s) yet, or an empty list if the * source data has no columns */ public List<String> getNonMappedColumns() { List<String> mapped = Lists.newArrayList(); // return empty list if source data has no columns if (columns.isEmpty()) { return mapped; } // get list of all columns mapped to fields for (PropertyMapping field : fields) { if (field.getIndex() != null && field.getIndex() >=0 && field.getIndex() < columns.size()) { String sourceColumn = columns.get(field.getIndex()); if (sourceColumn != null) { mapped.add(sourceColumn); } } } // get column mapped to coreId field if (mappingCoreid.getIndex() != null && mappingCoreid.getIndex() >= 0 && mappingCoreid.getIndex() < columns.size() && columns.get(mappingCoreid.getIndex()) != null) { mapped.add(columns.get(mappingCoreid.getIndex())); } // return list all source columns excluding those mapped List<String> nonMapped = Lists.newArrayList(columns); nonMapped.removeAll(mapped); return nonMapped; } /** * @return list of groups in extension that are redundant (are already included in the core extension) */ public List<String> getRedundantGroups() { List<String> redundantGroups = new ArrayList<String>(); if (resource.getCoreRowType() != null && !resource.getCoreRowType() .equalsIgnoreCase(mapping.getExtension().getRowType())) { Extension core = extensionManager.get(resource.getCoreRowType()); redundantGroups = extensionManager.getRedundantGroups(mapping.getExtension(), core); } return redundantGroups; } public List<String[]> getPeek() { return peek; } public Map<String, Map<String, String>> getVocabTerms() { return vocabTerms; } /** * Normalizes an incoming column name so that it can later be compared against a ConceptTerm's simpleName. * This method converts the incoming string to lower case, and will take the substring up to, but no including the * first ":". * * @param col column name * @return the normalized column name, or null if the incoming name was null or empty */ String normalizeColumnName(String col) { if (!Strings.isNullOrEmpty(col)) { col = NORM_TERM.matcher(col.toLowerCase()).replaceAll(""); if (col.contains(":")) { col = StringUtils.substringAfter(col, ":"); } return col; } return null; } @Override public void prepare() { super.prepare(); // get mapping sequence id from parameters as setters are not called yet String midStr = StringUtils.trimToNull(req.getParameter("mid")); if (midStr != null) { mid = Integer.valueOf(midStr); } // id is rowtype if (id != null) { // mapping id, i.e. list index for the given rowtype, is given if (mid == null) { Extension ext = extensionManager.get(id); if (ext != null) { mapping = new ExtensionMapping(); mapping.setExtension(ext); } // The extension could have been null if: // 1. The user tried to add a core mapping with the select help option, no extension would have been found // 2. No extension could be retrieved for the id (rowtype) // The result should be the user stays on the overview page, and displays a warning informing them that they // need to perform another selection. else { addActionError(getText("manage.overview.DwC.Mappings.select.invalid")); defaultResult = "error"; } } else { List<ExtensionMapping> maps = resource.getMappings(id); mapping = maps.get(mid); } } else { // worst case, just redirect to resource not found page notFound = true; } if (mapping != null && mapping.getExtension() != null) { // is source assigned yet? if (mapping.getSource() == null) { // get source parameter as setters are not called yet String source = StringUtils.trimToNull(req.getParameter("source")); if (source != null) { Source src = resource.getSource(source); mapping.setSource(src); } else { // show set source form defaultResult = "source"; } } // set empty filter if not existing if (mapping.getFilter() == null) { mapping.setFilter(new RecordFilter()); } // determine the core row type String coreRowType = resource.getCoreRowType(); if (coreRowType == null) { // not yet set, the current mapping must be the core type coreRowType = mapping.getExtension().getRowType(); } LOG.info("Core row type: " + coreRowType); String coreIdTerm = AppConfig.coreIdTerm(coreRowType); coreid = extensionManager.get(coreRowType).getProperty(coreIdTerm); LOG.info("Field representing the id for the core: " + coreid); // setup the core record id term mappingCoreid = mapping.getField(coreid.getQualname()); if (mappingCoreid == null) { mappingCoreid = new PropertyMapping(); mappingCoreid.setTerm(coreid); mappingCoreid.setIndex(mapping.getIdColumn()); fields = new ArrayList<PropertyMapping>(mapping.getExtension().getProperties().size()); } else { fields = new ArrayList<PropertyMapping>(mapping.getExtension().getProperties().size() -1); } // inspect source readSource(); datasetId = extensionManager.get(mapping.getExtension().getRowType()).getProperty(Constants.DWC_DATASET_ID); // prepare all other fields for (int i = 0; i < mapping.getExtension().getProperties().size(); i++) { ExtensionProperty ep = mapping.getExtension().getProperties().get(i); // ignore core id term if (!ep.equals(coreid)) { PropertyMapping pm = populatePropertyMapping(ep); fields.add(pm); // also store PropertyMapping by group/class String group = ep.getGroup(); if (group != null) { if (fieldsByGroup.get(group) == null) { fieldsByGroup.put(group, new ArrayList<PropertyMapping>()); } fieldsByGroup.get(group).add(pm); } // for easy retrieval of PropertyMapping index by qualifiedName.. fieldsTermIndices.put(ep.getQualname(), fields.lastIndexOf(pm)); // populate vocabulary terms if (ep.getVocabulary() != null) { String id = ep.getVocabulary().getUriString(); vocabTerms.put(id, vocabManager.getI18nVocab(id, getLocaleLanguage(), true)); } } } // finally do automapping if no fields are found if (mapping.getFields().isEmpty()) { int automapped = automap(); if (automapped > 0) { addActionMessage(getText("manage.mapping.automaped", new String[] {String.valueOf(automapped)})); } } // ensure existing configuration re-loaded setDoiUsedForDatasetId(mapping.isDoiUsedForDatasetId()); if (!isHttpPost()) { validateAndReport(); } } } /** * Populate a PropertyMapping from an ExtensionProperty. If the ExtensionProperty is already mapped, preserves * the existing PropertyMapping. Otherwise, creates a brand new PropertyMapping. * * @param ep ExtensionProperty * * @return PropertyMapping created */ private PropertyMapping populatePropertyMapping(ExtensionProperty ep) { // mapped already? PropertyMapping pm = mapping.getField(ep.getQualname()); if (pm == null) { // no, create brand new PropertyMapping pm = new PropertyMapping(); } pm.setTerm(ep); return pm; } private void readSource() { if (mapping.getSource() == null) { columns = new ArrayList<String>(); } else { peek = sourceManager.peek(mapping.getSource(), 5); // If user wants to import a source without a header lines, the columns are going to be numbered with the first // non-null value as an example. Otherwise, read the file/database normally. if (mapping.getSource().isFileSource() && ((TextFileSource) mapping.getSource()).getIgnoreHeaderLines() == 0) { columns = mapping.getColumns(peek); } else { columns = sourceManager.columns(mapping.getSource()); } if (columns.isEmpty() && mapping.getSource().getName() != null) { // TODO: i18n addActionWarning("Source " + mapping.getSource().getName() + " has no columns available to map. Please check that it has been configured correctly."); } } } @Override public String save() throws IOException { // a new mapping? if (resource.getMapping(id, mid) == null) { mid = resource.addMapping(mapping); } else { // save field mappings Set<PropertyMapping> mappedFields = new TreeSet<PropertyMapping>(); for (PropertyMapping f : fields) { Integer index = MoreObjects.firstNonNull(f.getIndex(), -9999); if (index >= 0 || StringUtils.trimToNull(f.getDefaultValue()) != null) { mappedFields.add(f); } } // save coreid field (e.g. occurrenceID) so that it is included in mapping, despite being a duplicate of coreid // Careful: only save coreid field for core extension mappings, not for extension mapping if (resource.getCoreRowType() != null && resource.getCoreRowType().equalsIgnoreCase(mapping.getExtension().getRowType())) { mappingCoreid.setIndex(mapping.getIdColumn()); mappingCoreid.setDefaultValue(mapping.getIdSuffix()); Integer index = MoreObjects.firstNonNull(mappingCoreid.getIndex(), -9999); if (index >= 0 || StringUtils.trimToNull(mappingCoreid.getDefaultValue()) != null) { mappedFields.add(mappingCoreid); } } // back to mapping object mapping.setFields(mappedFields); // persist other configurations, e.g. using DOI as datasetId mapping.setDoiUsedForDatasetId(doiUsedForDatasetId); } // update last modified dates Date lastModified = new Date(); mapping.setLastModified(lastModified); resource.setMappingsModified(lastModified); // save entire resource config saveResource(); // report validation without skipping this save validateAndReport(); LOG.debug("mapping saved.."); return defaultResult; } public String saveSetSource() { return INPUT; } public void setColumns(List<String> columns) { this.columns = columns; } public void setFields(List<PropertyMapping> fields) { this.fields = fields; } public void setMapping(ExtensionMapping mapping) { this.mapping = mapping; } public void setMappingCoreid(PropertyMapping mappingCoreid) { this.mappingCoreid = mappingCoreid; } public void setDatasetId(ExtensionProperty datasetId) { this.datasetId = datasetId; } public void setMid(Integer mid) { this.mid = mid; } /** * @return true if the DOI should be used for the datasetId, false otherwise */ public boolean isDoiUsedForDatasetId() { return doiUsedForDatasetId; } public void setDoiUsedForDatasetId(boolean doiUsedForDatasetId) { this.doiUsedForDatasetId = doiUsedForDatasetId; } /** * Called from Freemarker template. */ public Map<String, List<PropertyMapping>> getFieldsByGroup() { return fieldsByGroup; } /** * Called from Freemarker template. */ public Map<String, Integer> getFieldsTermIndices() { return fieldsTermIndices; } /** * @return true if mapping is a core mapping, false if mapping is an extension mapping */ public boolean isCoreMapping() { if (mapping.isCore() && resource.getCoreRowType() != null) { return resource.getCoreRowType().equalsIgnoreCase(mapping.getExtension().getRowType()); } return false; } }