/*
* Copyright (c) 2012 Data Harmonisation Panel
*
* All rights reserved. This program and the accompanying materials are made
* available under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the License,
* or (at your option) any later version.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this distribution. If not, see <http://www.gnu.org/licenses/>.
*
* Contributors:
* HUMBOLDT EU Integrated Project #030962
* Data Harmonisation Panel <http://www.dhpanel.eu>
*/
package eu.esdihumboldt.cst.functions.core.join;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.net.URI;
import java.net.URL;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Multimap;
import eu.esdihumboldt.hale.common.align.model.AlignmentUtil;
import eu.esdihumboldt.hale.common.align.model.ParameterValue;
import eu.esdihumboldt.hale.common.align.model.functions.JoinFunction;
import eu.esdihumboldt.hale.common.align.model.functions.join.JoinParameter;
import eu.esdihumboldt.hale.common.align.model.functions.join.JoinParameter.JoinCondition;
import eu.esdihumboldt.hale.common.align.model.impl.PropertyEntityDefinition;
import eu.esdihumboldt.hale.common.align.model.impl.TypeEntityDefinition;
import eu.esdihumboldt.hale.common.align.transformation.engine.TransformationEngine;
import eu.esdihumboldt.hale.common.align.transformation.function.InstanceHandler;
import eu.esdihumboldt.hale.common.align.transformation.function.TransformationException;
import eu.esdihumboldt.hale.common.align.transformation.function.impl.FamilyInstanceImpl;
import eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog;
import eu.esdihumboldt.hale.common.instance.model.FamilyInstance;
import eu.esdihumboldt.hale.common.instance.model.Instance;
import eu.esdihumboldt.hale.common.instance.model.InstanceCollection;
import eu.esdihumboldt.hale.common.instance.model.InstanceReference;
import eu.esdihumboldt.hale.common.instance.model.ResourceIterator;
import eu.esdihumboldt.hale.common.instance.model.impl.GenericResourceIteratorAdapter;
import eu.esdihumboldt.hale.common.schema.model.TypeDefinition;
import eu.esdihumboldt.hale.common.schema.model.constraint.property.Reference;
/**
* Join based on equal properties.
*
* @author Kai Schwierczek
*/
public class JoinHandler implements InstanceHandler<TransformationEngine>, JoinFunction {
// For now no support for using the same type more than once in a join.
/**
* @see eu.esdihumboldt.hale.common.align.transformation.function.InstanceHandler#partitionInstances(eu.esdihumboldt.hale.common.instance.model.InstanceCollection,
* java.lang.String,
* eu.esdihumboldt.hale.common.align.transformation.engine.TransformationEngine,
* com.google.common.collect.ListMultimap, java.util.Map,
* eu.esdihumboldt.hale.common.align.transformation.report.TransformationLog)
*/
@Override
public ResourceIterator<FamilyInstance> partitionInstances(InstanceCollection instances,
String transformationIdentifier, TransformationEngine engine,
ListMultimap<String, ParameterValue> transformationParameters,
Map<String, String> executionParameters, TransformationLog log)
throws TransformationException {
if (transformationParameters == null
|| !transformationParameters.containsKey(PARAMETER_JOIN)
|| transformationParameters.get(PARAMETER_JOIN).isEmpty()) {
throw new TransformationException("No join parameter defined");
}
JoinParameter joinParameter = transformationParameters.get(PARAMETER_JOIN).get(0)
.as(JoinParameter.class);
String validation = joinParameter.validate();
if (validation != null)
throw new TransformationException("Join parameter invalid: " + validation);
List<TypeEntityDefinition> types = joinParameter.types;
// ChildType -> DirectParentType
int[] directParent = new int[joinParameter.types.size()];
// ChildType -> (ParentType -> Collection<JoinCondition>)
Map<Integer, Multimap<Integer, JoinCondition>> joinTable = new HashMap<>();
// all joined properties
Multimap<TypeDefinition, PropertyEntityDefinition> properties = HashMultimap.create();
for (JoinCondition condition : joinParameter.conditions) {
int baseTypeIndex = types.indexOf(AlignmentUtil.getTypeEntity(condition.baseProperty));
int joinTypeIndex = types.indexOf(AlignmentUtil.getTypeEntity(condition.joinProperty));
Multimap<Integer, JoinCondition> typeTable = joinTable.get(joinTypeIndex);
if (typeTable == null) {
typeTable = ArrayListMultimap.create(2, 2);
joinTable.put(joinTypeIndex, typeTable);
}
typeTable.put(baseTypeIndex, condition);
// update highest type if necessary
if (directParent[joinTypeIndex] < baseTypeIndex)
directParent[joinTypeIndex] = baseTypeIndex;
properties.put(condition.joinProperty.getType(), condition.joinProperty);
}
// JoinProperty -> (Value -> Collection<Reference>)
Map<PropertyEntityDefinition, Multimap<Object, InstanceReference>> index = new HashMap<>();
for (PropertyEntityDefinition property : properties.values())
index.put(property, ArrayListMultimap.<Object, InstanceReference> create());
// remember instances of first type to start join afterwards
Collection<InstanceReference> startInstances = new LinkedList<InstanceReference>();
// iterate once over all instances
ResourceIterator<Instance> iterator = instances.iterator();
try {
while (iterator.hasNext()) {
Instance next = iterator.next();
// remember instances of first type
if (next.getDefinition().equals(types.get(0).getDefinition())) {
startInstances.add(instances.getReference(next));
}
// fill index over needed properties
for (PropertyEntityDefinition property : properties.get(next.getDefinition())) {
// XXX what about null? for now ignore null values
// XXX how to treat multiple values? must all be equal (in
// order?) or only one?
Collection<Object> values = AlignmentUtil.getValues(next, property, true);
if (values != null && !values.isEmpty()) {
// XXX take only first value for now
index.get(property).put(processValue(values.iterator().next(), property),
instances.getReference(next));
}
}
}
} finally {
iterator.close();
}
return new JoinIterator(instances, startInstances, directParent, index, joinTable);
}
/**
* Process a value of a property in a join condition before using it with
* the index.
*
* @param value the value
* @param property the entity definition the value is associated to
* @return the processed value, possibly wrapped or replaced through a
* different representation
*/
protected Object processValue(Object value, PropertyEntityDefinition property) {
// extract the identifier from a reference
value = property.getDefinition().getConstraint(Reference.class).extractId(value);
/*
* This is done so values will be classified as equal even if they are
* of different types, e.g. Long and Integer or Integer and String.
*/
/*
* Use string representation for numbers.
*/
if (value instanceof Number) {
if (value instanceof BigInteger || value instanceof Long || value instanceof Integer
|| value instanceof Byte || value instanceof Short) {
// use string representation for integer numbers
value = value.toString();
}
else if (value instanceof BigDecimal) {
BigDecimal v = (BigDecimal) value;
if (v.scale() <= 0) {
// use string representation for integer big decimal
value = v.toBigInteger().toString();
}
}
}
/*
* Use string representation for URIs and URLs.
*/
if (value instanceof URI || value instanceof URL) {
value = value.toString();
}
return value;
}
private class JoinIterator
extends GenericResourceIteratorAdapter<InstanceReference, FamilyInstance> {
private final InstanceCollection instances;
// type -> direct-parent
private final int[] parent;
// TypeProp -> (Value -> Collection<Reference>)
private final Map<PropertyEntityDefinition, Multimap<Object, InstanceReference>> index;
// ChildType -> (ParentType -> Collection<JoinCondition>)
private final Map<Integer, Multimap<Integer, JoinCondition>> joinTable;
protected JoinIterator(InstanceCollection instances,
Collection<InstanceReference> startInstances, int[] parent,
Map<PropertyEntityDefinition, Multimap<Object, InstanceReference>> index,
Map<Integer, Multimap<Integer, JoinCondition>> joinTable) {
super(startInstances.iterator());
this.instances = instances;
this.parent = parent;
this.index = index;
this.joinTable = joinTable;
}
/**
* @see eu.esdihumboldt.hale.common.instance.model.impl.GenericResourceIteratorAdapter#convert(java.lang.Object)
*/
@Override
protected FamilyInstance convert(InstanceReference next) {
FamilyInstance base = new FamilyInstanceImpl(instances.getInstance(next));
FamilyInstance[] currentInstances = new FamilyInstance[parent.length];
currentInstances[0] = base;
join(currentInstances, 0);
return base;
}
// Joins all direct children of the given type to currentInstances.
private void join(FamilyInstance[] currentInstances, int currentType) {
// Join all types that are direct children of the last type.
for (int i = currentType + 1; i < parent.length; i++) {
if (parent[i] == currentType) {
// Get join condition for the direct child type.
Multimap<Integer, JoinCondition> joinConditions = joinTable.get(i);
// Collect intersection of conditions. null marks beginning
// in contrast to an empty set.
Set<InstanceReference> possibleInstances = null;
// ParentType -> JoinConditions
for (Map.Entry<Integer, JoinCondition> joinCondition : joinConditions
.entries()) {
Collection<Object> currentValues = AlignmentUtil.getValues(
currentInstances[joinCondition.getKey()],
joinCondition.getValue().baseProperty, true);
if (currentValues == null) {
possibleInstances = Collections.emptySet();
break;
}
// Allow targets with any of the property values.
HashSet<InstanceReference> matches = new HashSet<InstanceReference>();
for (Object currentValue : currentValues) {
matches.addAll(index.get(joinCondition.getValue().joinProperty)
.get(processValue(currentValue,
joinCondition.getValue().baseProperty)));
}
if (possibleInstances == null)
possibleInstances = matches;
else {
// Intersect!
Iterator<InstanceReference> iter = possibleInstances.iterator();
while (iter.hasNext()) {
InstanceReference ref = iter.next();
if (!matches.contains(ref))
iter.remove();
}
}
// Break if set is empty.
if (possibleInstances.isEmpty())
break;
}
if (possibleInstances != null && !possibleInstances.isEmpty()) {
FamilyInstance parent = currentInstances[currentType];
for (InstanceReference ref : possibleInstances) {
FamilyInstance child = new FamilyInstanceImpl(
instances.getInstance(ref));
parent.addChild(child);
currentInstances[i] = child;
join(currentInstances, i);
}
currentInstances[i] = null;
}
}
}
}
/**
* @see java.util.Iterator#remove()
*/
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
}