/* * Copyright (c) 2012 Data Harmonisation Panel * * All rights reserved. This program and the accompanying materials are made * available under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation, either version 3 of the License, * or (at your option) any later version. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution. If not, see <http://www.gnu.org/licenses/>. * * Contributors: * HUMBOLDT EU Integrated Project #030962 * Data Harmonisation Panel <http://www.dhpanel.eu> */ package eu.esdihumboldt.cst.functions.core.merge; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import javax.xml.namespace.QName; import com.google.common.collect.ListMultimap; import eu.esdihumboldt.hale.common.align.model.ParameterValue; import eu.esdihumboldt.hale.common.align.model.functions.MergeFunction; import eu.esdihumboldt.hale.common.align.model.functions.merge.MergeUtil; import eu.esdihumboldt.hale.common.align.transformation.function.TransformationException; import eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog; import eu.esdihumboldt.hale.common.instance.groovy.InstanceAccessor; import eu.esdihumboldt.hale.common.instance.model.Instance; import eu.esdihumboldt.hale.common.instance.model.InstanceCollection; import eu.esdihumboldt.hale.common.instance.model.InstanceMetadata; import eu.esdihumboldt.hale.common.instance.model.MutableInstance; import eu.esdihumboldt.hale.common.instance.model.ResourceIterator; import eu.esdihumboldt.hale.common.schema.model.TypeDefinition; /** * Merge based on equal properties. * * @author Simon Templer */ public class PropertiesMergeHandler extends AbstractMergeHandler<PropertiesMergeHandler.PropertiesMergeConfig, DeepIterableKey> implements MergeFunction { class PropertiesMergeConfig { private final List<List<QName>> keyProperties; private final List<List<QName>> additionalProperties; private final boolean autoDetect; private PropertiesMergeConfig(List<List<QName>> keyProperties, List<List<QName>> additionalProperties, boolean autoDetect) { super(); this.keyProperties = keyProperties; this.additionalProperties = additionalProperties; this.autoDetect = autoDetect; } } @Override protected PropertiesMergeConfig createMergeConfiguration(String transformationIdentifier, ListMultimap<String, ParameterValue> transformationParameters, Map<String, String> executionParameters, TransformationLog log) throws TransformationException { if (transformationParameters == null) { throw new TransformationException("Transformation parameters invalid"); } List<List<QName>> properties = MergeUtil.getProperties(transformationParameters, PARAMETER_PROPERTY); List<List<QName>> additionalProperties = MergeUtil.getProperties(transformationParameters, PARAMETER_ADDITIONAL_PROPERTY); boolean autoDetect; if (transformationParameters.get(PARAMETER_AUTO_DETECT).isEmpty()) { // default to false (original behavior) autoDetect = false; } else { autoDetect = Boolean.parseBoolean( transformationParameters.get(PARAMETER_AUTO_DETECT).get(0).as(String.class)); } return new PropertiesMergeConfig(properties, additionalProperties, autoDetect); } @Override protected DeepIterableKey getMergeKey(Instance instance, PropertiesMergeConfig mergeConfig) { if (mergeConfig.keyProperties.isEmpty()) { // merge all instances return KEY_ALL; } List<Object> valueList = new ArrayList<Object>(mergeConfig.keyProperties.size()); for (List<QName> propertyPath : mergeConfig.keyProperties) { // retrieve values from instance // XXX should nulls be listed? InstanceAccessor accessor = new InstanceAccessor(instance); for (QName name : propertyPath) { accessor.findChildren(name.getLocalPart(), Collections.singletonList(name.getNamespaceURI())); } valueList.add(accessor.list(true)); } return new DeepIterableKey(valueList); } @Override protected Instance merge(InstanceCollection instances, TypeDefinition type, DeepIterableKey mergeKey, PropertiesMergeConfig mergeConfig) { if (instances.hasSize() && instances.size() == 1) { // early exit if only one instance to merge try (ResourceIterator<Instance> it = instances.iterator()) { return it.next(); } } MutableInstance result = getInstanceFactory().createInstance(type); /* * FIXME This a first VERY basic implementation, where only the first * item in each property path is regarded, and that whole tree is added * only once (from the first instance). XXX This especially will be a * problem, if a path contains a choice. XXX For more advanced stuff we * need more advanced test cases. */ Set<QName> rootNames = new HashSet<QName>(); Set<QName> nonKeyRootNames = new HashSet<QName>(); // collect path roots for (List<QName> path : mergeConfig.keyProperties) { rootNames.add(path.get(0)); } for (List<QName> path : mergeConfig.additionalProperties) { nonKeyRootNames.add(path.get(0)); } // XXX what about metadata?! // XXX for now only retain IDs Set<Object> ids = new HashSet<Object>(); try (ResourceIterator<Instance> it = instances.iterator()) { while (it.hasNext()) { Instance instance = it.next(); for (QName name : instance.getPropertyNames()) { if (rootNames.contains(name)) { /* * Property is merge key -> only use first occurrence * (as all entries need to be the same) * * TODO adapt if multiple keys are possible per instance */ addFirstOccurrence(result, instance, name); } else if (nonKeyRootNames.contains(name)) { /* * Property is additional merge property. * * Traditional behavior: Only keep unique values. * * XXX should this be configurable? */ addUnique(result, instance, name); } else if (mergeConfig.autoDetect) { /* * Auto-detection is enabled. * * Only keep unique values. * * XXX This differs from the traditional behavior in * that there only the first value would be used, but * only if all values were equal. That cannot be easily * checked in an iterative approach. */ addUnique(result, instance, name); } else { /* * Property is not to be merged. * * XXX but we could do some kind of aggregation * * XXX for now just add all values */ addValues(result, instance, name); } } List<Object> instanceIDs = instance.getMetaData(InstanceMetadata.METADATA_ID); for (Object id : instanceIDs) { ids.add(id); } } } // store metadata IDs result.setMetaData(InstanceMetadata.METADATA_ID, ids.toArray()); return result; } /** * Apply instance property values to the merged result instance. Use the * "first occurrence" strategy that only keeps the values from the first * instance. * * @param result the result instance * @param instance the instance to merge with the result * @param property the name of the property that should be handled */ private void addFirstOccurrence(MutableInstance result, Instance instance, QName property) { Object[] existingValues = result.getProperty(property); if (existingValues == null || existingValues.length <= 0) { // no values yet -> add values addValues(result, instance, property); } } /** * Apply instance property values to the merged result instance. Use the * "unique" strategy that only keeps unique values. * * @param result the result instance * @param instance the instance to merge with the result * @param property the name of the property that should be handled */ private void addUnique(MutableInstance result, Instance instance, QName property) { Object[] values = instance.getProperty(property); if (values == null || values.length <= 0) { return; } // collect unique values Object[] existingValues = result.getProperty(property); Set<DeepIterableKey> uniqueValues = new HashSet<>(); if (existingValues != null) { for (Object value : existingValues) { uniqueValues.add(new DeepIterableKey(value)); } } // add values not contained yet for (Object value : values) { DeepIterableKey key = new DeepIterableKey(value); if (uniqueValues.add(key)) { result.addProperty(property, value); } } } /** * Apply instance property values to the merged result instance. Use the * "add values" strategy that keeps all values. * * @param result the result instance * @param instance the instance to merge with the result * @param property the name of the property that should be handled */ private void addValues(MutableInstance result, Instance instance, QName property) { // add all values Object[] values = instance.getProperty(property); if (values != null) { for (Object value : values) { result.addProperty(property, value); } } } @SuppressWarnings("unused") private boolean allEqual(List<Object[]> list) { Iterator<Object[]> iter = list.iterator(); // get first element DeepIterableKey first = new DeepIterableKey(iter.next()); // compare rest to first while (iter.hasNext()) if (!first.equals(new DeepIterableKey(iter.next()))) return false; return true; } }