/*******************************************************************************
* Copyright (c) 2012 Obeo.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Obeo - initial API and implementation
*******************************************************************************/
package org.eclipse.emf.compare.match.resource;
import com.google.common.base.Objects;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.eclipse.emf.compare.CompareFactory;
import org.eclipse.emf.compare.MatchResource;
import org.eclipse.emf.compare.internal.utils.DiffUtil;
import org.eclipse.emf.ecore.resource.Resource;
/**
* This implementation of a matching strategy will try and determine the resource mappings through the
* similarity of their names.
* <p>
* Specifically, this will determine the cartesian product of the resource sets, compute a similarity for
* every single couple of Resource, then consider that every similarity that is above 80% constitutes a
* mapping.
* </p>
*
* @author <a href="mailto:laurent.goubet@obeo.fr">Laurent Goubet</a>
*/
public class NameSimilarityMatchingStrategy implements IResourceMatchingStrategy {
/**
* {@inheritDoc}
*
* @see org.eclipse.emf.compare.match.resource.IResourceMatchingStrategy#matchResources(java.lang.Iterable,
* java.lang.Iterable, java.lang.Iterable)
*/
public List<MatchResource> matchResources(Iterable<? extends Resource> left,
Iterable<? extends Resource> right, Iterable<? extends Resource> origin) {
final List<MatchResource> mappings = Lists.newArrayList();
final Set<List<Resource>> productLR = cartesianProductOf(left, right);
final Set<List<Resource>> productLO = cartesianProductOf(left, origin);
final List<ResourceSimilarity> similaritiesLR = Lists.newArrayList();
final List<ResourceSimilarity> similaritiesLO = Lists.newArrayList();
for (List<Resource> couple : productLR) {
similaritiesLR.add(new ResourceSimilarity(couple.get(0), couple.get(1)));
}
for (List<Resource> couple : productLO) {
similaritiesLO.add(new ResourceSimilarity(couple.get(0), couple.get(1)));
}
Collections.sort(similaritiesLR);
Collections.sort(similaritiesLO);
final double matchThreshold = 0.8d;
double currentSimilarity = 1d;
Iterator<ResourceSimilarity> similaritiesLRIterator = similaritiesLR.iterator();
while (similaritiesLRIterator.hasNext() && currentSimilarity >= matchThreshold) {
final ResourceSimilarity sortedCoupleLR = similaritiesLRIterator.next();
final Resource leftRes = sortedCoupleLR.getFirst();
final Resource rightRes = sortedCoupleLR.getSecond();
Resource originRes = null;
Iterator<ResourceSimilarity> loIterator = similaritiesLO.iterator();
while (loIterator.hasNext() && originRes == null) {
final ResourceSimilarity sortedCoupleLO = loIterator.next();
if (sortedCoupleLO.getFirst() == leftRes) {
originRes = sortedCoupleLO.getSecond();
}
}
mappings.add(createMatchResource(leftRes, rightRes, originRes));
}
/*
* FIXME This was a work in progress that has been left alone for now as it is assumed to be too
* costly. Either finish the implementation (in its current state, it would not check for matches
* between the right and origin if they have no "left" counterpart) or remove the class altogether.
*/
return mappings;
}
/**
* Computes the cartesian product of the two given iterables by converting them to {@link Set}s and
* feeding them to {@link Sets#cartesianProduct(List)}.
*
* @param iterable1
* First of the two iterables of which we need the cartesian product.
* @param iterable2
* Second of the two iterables of which we need the cartesian product.
* @param <T>
* Type of iterables' content.
* @return The cartesian product of the two given iterables.
* @see Sets#cartesianProduct(List)
*/
private static <T> Set<List<T>> cartesianProductOf(Iterable<? extends T> iterable1,
Iterable<? extends T> iterable2) {
Set<T> set1 = Sets.newLinkedHashSet(iterable1);
Set<T> set2 = Sets.newLinkedHashSet(iterable2);
List<Set<T>> input = ImmutableList.of(set1, set2);
return Sets.cartesianProduct(input);
}
/**
* Creates a {@link MatchResource} instance and sets all three resources of the mapping on it.
*
* @param left
* The left resource of this mapping.
* @param right
* The right resource of this mapping.
* @param origin
* The origin resource of this mapping.
* @return The create mapping.
*/
protected static MatchResource createMatchResource(Resource left, Resource right, Resource origin) {
final MatchResource match = CompareFactory.eINSTANCE.createMatchResource();
match.setLeft(left);
match.setRight(right);
match.setOrigin(origin);
if (left != null && left.getURI() != null) {
match.setLeftURI(left.getURI().toString());
}
if (right != null && right.getURI() != null) {
match.setRightURI(right.getURI().toString());
}
if (origin != null && origin.getURI() != null) {
match.setOriginURI(origin.getURI().toString());
}
return match;
}
/**
* This simple structure will only be used internally in order to compute the similarities between the
* names of a resource couple.
*
* @author <a href="mailto:laurent.goubet@obeo.fr">Laurent Goubet</a>
*/
private static class ResourceSimilarity implements Comparable<ResourceSimilarity> {
/** First resource of the couple for which we computed a similarity. */
private Resource first;
/** Second resource of the couple for which we computed a similarity. */
private Resource second;
/**
* Instantiates a ResourceSimilarity structure given the two resources for which we need a similarity.
*
* @param first
* First resource of the couple for which we need a similarity.
* @param second
* Second resource of the couple for which we need a similarity.
*/
ResourceSimilarity(Resource first, Resource second) {
this.first = first;
this.second = second;
}
/**
* Returns the first resource of this couple.
*
* @return The first resource of this couple.
*/
public Resource getFirst() {
return first;
}
/**
* Returns the second resource of this couple.
*
* @return The second resource of this couple.
*/
public Resource getSecond() {
return second;
}
/**
* Compute and return the similarity between the two resources of this couple. The Similarity with
* this default implementation will be the dice coefficient of the two resources' name.
*
* @return The similarity between these two resources.
*/
public double getSimilarity() {
String firstName = first.getURI().lastSegment();
String secondName = second.getURI().lastSegment();
return DiffUtil.diceCoefficient(firstName, secondName);
}
/**
* {@inheritDoc}
*
* @see java.lang.Comparable#compareTo(java.lang.Object)
*/
public int compareTo(ResourceSimilarity other) {
return Double.compare(getSimilarity(), other.getSimilarity());
}
/**
* {@inheritDoc}
*
* @see java.lang.Object#equals(java.lang.Object)
*/
@Override
public boolean equals(Object obj) {
final boolean equal;
if (obj == this) {
equal = true;
} else if (obj instanceof ResourceSimilarity) {
equal = getFirst().getURI().equals(((ResourceSimilarity)obj).getFirst().getURI())
&& getSecond().getURI().equals(((ResourceSimilarity)obj).getSecond().getURI());
} else {
equal = false;
}
return equal;
}
/**
* {@inheritDoc}
*
* @see java.lang.Object#hashCode()
*/
@Override
public int hashCode() {
return Objects.hashCode(getFirst(), getSecond());
}
}
}