/** * Copyright (C) 2013-2014 Olaf Lessenich * Copyright (C) 2014-2015 University of Passau, Germany * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301 USA * * Contributors: * Olaf Lessenich <lessenic@fim.uni-passau.de> * Georg Seibt <seibt@fim.uni-passau.de> */ package de.fosd.jdime.matcher; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import java.util.stream.Stream; import de.fosd.jdime.artifact.Artifact; import de.fosd.jdime.artifact.ArtifactList; import de.fosd.jdime.config.merge.MergeContext; import de.fosd.jdime.config.merge.Revision; import de.fosd.jdime.matcher.cost_model.CMMode; import de.fosd.jdime.matcher.cost_model.CostModelMatcher; import de.fosd.jdime.matcher.matching.Color; import de.fosd.jdime.matcher.matching.LookAheadMatching; import de.fosd.jdime.matcher.matching.Matching; import de.fosd.jdime.matcher.matching.Matchings; import de.fosd.jdime.matcher.ordered.EqualityMatcher; import de.fosd.jdime.matcher.ordered.OrderedMatcher; import de.fosd.jdime.matcher.ordered.mceSubtree.MCESubtreeMatcher; import de.fosd.jdime.matcher.ordered.simpleTree.SimpleTreeMatcher; import de.fosd.jdime.matcher.unordered.UniqueLabelMatcher; import de.fosd.jdime.matcher.unordered.UnorderedMatcher; import de.fosd.jdime.matcher.unordered.assignmentProblem.HungarianMatcher; import de.fosd.jdime.stats.KeyEnums; import de.fosd.jdime.strdump.DumpMode; import de.fosd.jdime.util.UnorderedTuple; import static de.fosd.jdime.config.merge.MergeContext.LOOKAHEAD_OFF; import static de.fosd.jdime.stats.KeyEnums.Type.METHOD; import static de.fosd.jdime.stats.KeyEnums.Type.TRY; /** * A <code>Matcher</code> is used to compare two <code>Artifacts</code> and to * compute and store <code>Matching</code>s. * <p> * The computation of <code>Matching</code>s is done recursively. Depending on * the <code>Artifact</code>, the matcher decides whether the order of elements * is important (e.g., statements within a method in a Java AST) or not (e.g., * method declarations in a Java AST) for syntactic correctness. Then either an * implementation of <code>OrderedMatcher</code> or * <code>UnorderedMatcher</code> is called to compute the actual <code>Matching</code>. * Usually, those subclass implementations use this <code>Matcher</code> * superclass for the recursive call of the match() method. * <p> * When the computation is done and the best combination of matches have been * selected, they are stored recursively within the <code>Artifact</code> nodes * themselves, assigning each matched <code>Artifact</code> a pointer to the * corresponding matching <code>Artifact</code>. * * @author Olaf Lessenich * * @param <T> type of <code>Artifact</code> */ public class Matcher<T extends Artifact<T>> { private static final Logger LOG = Logger.getLogger(Matcher.class.getCanonicalName()); private static final String ID = Matcher.class.getSimpleName(); private int calls = 0; private int equalityCalls = 0; private int orderedCalls = 0; private int unorderedCalls = 0; private UnorderedMatcher<T> unorderedMatcher; private UnorderedMatcher<T> unorderedLabelMatcher; private OrderedMatcher<T> orderedMatcher; private OrderedMatcher<T> mceSubtreeMatcher; private UnorderedTuple<T, T> lookupTuple; private Map<UnorderedTuple<T, T>, Matching<T>> trivialMatches; private EqualityMatcher<T> equalityMatcher; private CostModelMatcher<T> cmMatcher; private Set<Artifact<T>> orderedChildren; private Set<Artifact<T>> uniquelyLabeledChildren; private Set<Artifact<T>> fullyOrdered; private Set<Artifact<T>> cachedRoots; /** * Constructs a new <code>Matcher</code>. */ public Matcher() { // no method reference because this syntax makes setting a breakpoint for debugging easier MatcherInterface<T> rootMatcher = (context, left, right) -> { return match(context, left, right); }; unorderedMatcher = new HungarianMatcher<>(rootMatcher); unorderedLabelMatcher = new UniqueLabelMatcher<>(rootMatcher); orderedMatcher = new SimpleTreeMatcher<>(rootMatcher); mceSubtreeMatcher = new MCESubtreeMatcher<>(rootMatcher); lookupTuple = UnorderedTuple.of(null, null); trivialMatches = new HashMap<>(); equalityMatcher = new EqualityMatcher<>(null); cmMatcher = new CostModelMatcher<>(); orderedChildren = new HashSet<>(); uniquelyLabeledChildren = new HashSet<>(); fullyOrdered = new HashSet<>(); cachedRoots = new HashSet<>(); } /** * Compares two nodes and returns matchings between them and possibly their sub-nodes. * * @param context * <code>MergeContext</code> * @param left * left node * @param right * right node * @param color * color of the matching (for debug output only) * @return <code>Matchings</code> of the two nodes */ public Matchings<T> match(MergeContext context, T left, T right, Color color) { Matchings<T> matchings; if (context.getCMMatcherMode() == CMMode.REPLACEMENT) { matchings = cmMatcher.match(context, left, right); } else { cache(context, left, right); matchings = match(context, left, right); if (context.getCMMatcherMode() == CMMode.POST_PROCESSOR && matchings.get(left, right).map(m -> !m.hasFullyMatched()).orElse(true)) { matchings = cmMatcher.match(context, left, right, matchings); } } matchings.get(left, right).ifPresent(m -> { LOG.fine(() -> { Revision lRev = left.getRevision(); Revision rRev = right.getRevision(); return String.format("Matched revision %s and %s with score %d", lRev, rRev, m.getScore()); }); }); LOG.fine(this::getLog); storeMatchings(context, matchings, color); if (LOG.isLoggable(Level.FINEST)) { LOG.finest(String.format("Dumping matching of %s and %s", left.getRevision(), right.getRevision())); System.out.println(matchings); } if (LOG.isLoggable(Level.FINE)) { LOG.fine(left.getRevision() + ".dumpTree():"); System.out.println(left.dump(DumpMode.PLAINTEXT_TREE)); LOG.fine(right.getRevision() + ".dumpTree():"); System.out.println(right.dump(DumpMode.PLAINTEXT_TREE)); } return matchings; } /** * Computes some results used during the matching of <code>left</code> and <code>right</code> and caches them. * * @param context * the current <code>MergeContext</code> * @param left * the left node to be matched * @param right * the right node to be matched */ private void cache(MergeContext context, T left, T right) { trivialMatches.clear(); if (!cachedRoots.contains(left) || !cachedRoots.contains(right)) { Matchings<T> trivialMatches = new EqualityMatcher<T>(null).match(context, left, right); trivialMatches.forEach(m -> this.trivialMatches.put(m.getMatchedArtifacts(), m)); } if (!cachedRoots.contains(left)) { cacheOrderingAndLabeling(left); } if (!cachedRoots.contains(right)) { cacheOrderingAndLabeling(right); } cachedRoots.add(left); cachedRoots.add(right); } /** * Caches (recursively for every artifact in the tree under <code>artifact</code>) the ordering * (whether the artifact itself is ordered, its children are ordered or the whole tree with <code>artifact</code> * at its root is ordered) and whether the the artifact has uniquely labeled children. * * @param artifact * the <code>artifact</code> for which results are to be cached */ private void cacheOrderingAndLabeling(T artifact) { ArtifactList<T> children = artifact.getChildren(); children.forEach(this::cacheOrderingAndLabeling); if (children.stream().map(T::getUniqueLabel).allMatch(Optional::isPresent)) { uniquelyLabeledChildren.add(artifact); } if (children.stream().anyMatch(T::isOrdered)) { orderedChildren.add(artifact); } if (children.stream().allMatch(fullyOrdered::contains) && artifact.isOrdered()) { fullyOrdered.add(artifact); } } /** * @see MatcherInterface#match(MergeContext, Artifact, Artifact) */ private Matchings<T> match(MergeContext context, T left, T right) { if (left.isConflict()) { Matchings<T> m = Matchings.of(left, right, 0); m.get(left, right).get().setAlgorithm(ID); return m; } if (left.isChoice()) { // We have to split the choice node into its variants and create a matching for each one. // The highest matching is returned. LOG.finest(() -> { String name = getClass().getSimpleName(); return String.format("%s encountered a choice node (%s)", name, left.getId()); }); Map<Integer, Matchings<T>> variantMatches = new HashMap<>(); for (T variant: left.getVariants().values()) { LOG.finest(() -> { String name = getClass().getSimpleName(); return String.format("%s.match(%s, %s)", name, variant.getId(), right.getId()); }); Matchings<T> cur = match(context, variant, right); Matching<T> highest = cur.get(variant, right).get(); variantMatches.put(highest.getScore(), cur); } Matchings<T> maxMatching = variantMatches.get(Collections.max(variantMatches.keySet())); LOG.finest(() -> { String name = this.getClass().getSimpleName(); return String.format("%s: highest match: %s", name, maxMatching); }); return maxMatching; } /* * Before firing up potentially expensive matching algorithms, we check whether the trees are identical. * To avoid redundant calls, we save the matchings reported by EqualityMatcher and perform lookups on * subsequent runs. */ Optional<Matchings<T>> trivialMatches = getTrivialMatchings(context, left, right); if (trivialMatches.isPresent()) { calls++; equalityCalls++; logMatcherUse(EqualityMatcher.class, left, right); return trivialMatches.get(); } if (!left.matches(right)) { Optional<UnorderedTuple<T, T>> resumeTuple = lookAhead(context, left, right); if (resumeTuple.isPresent()) { UnorderedTuple<T, T> toMatch = resumeTuple.get(); Matchings<T> subMatchings = getMatchings(context, toMatch.getX(), toMatch.getY()); Matching<T> subMatching = subMatchings.get(toMatch.getX(), toMatch.getY()).orElseThrow(() -> new RuntimeException("Hilfe")); Matching<T> lookAheadMatching = new LookAheadMatching<>(subMatching, left, right); subMatchings.remove(subMatching); subMatchings.add(lookAheadMatching); return subMatchings; } else { /* * The roots do not match and we cannot use the look-ahead feature. We therefore ignore the rest of the * subtrees and return early to save time. */ LOG.finest(() -> { String format = "%s - early return while matching %s and %s (LookAhead = %d)"; return String.format(format, ID, left.getId(), right.getId(), context.getLookAhead()); }); Matchings<T> m = Matchings.of(left, right, 0); m.get(left, right).get().setAlgorithm(ID); return m; } } return getMatchings(context, left, right); } /** * Returns the trivial Matchings if <code>left</code> and <code>right</code> are exactly equal as determined by * the <code>EqualityMatcher</code>. * * @param context * the <code>MergeContext</code> * @param left * the left tree * @param right * the right tree * @return the <code>Matchings</code> */ private Optional<Matchings<T>> getTrivialMatchings(MergeContext context, T left, T right) { lookupTuple.setX(left); lookupTuple.setY(right); if (!equalityMatcher.didNotMatch(lookupTuple) && !trivialMatches.containsKey(lookupTuple)) { Matchings<T> trivialMatches = equalityMatcher.match(context, left, right); trivialMatches.forEach(m -> this.trivialMatches.put(m.getMatchedArtifacts(), m)); } if (trivialMatches.containsKey(lookupTuple)) { Matchings<T> matchings = new Matchings<>(); matchings.add(trivialMatches.get(lookupTuple)); Iterator<T> lIt = left.getChildren().iterator(); Iterator<T> rIt = right.getChildren().iterator(); while (lIt.hasNext() && rIt.hasNext()) { matchings.addAll(getTrivialMatchings(context, lIt.next(), rIt.next()).get()); } lookupTuple.setX(null); lookupTuple.setY(null); return Optional.of(matchings); } else { return Optional.empty(); } } /** * Determines which <code>Matcher</code> to use for matching <code>left</code> and <code>right</code> and returns * the resulting <code>Matchings</code>. * * @param context * the <code>MergeContext</code> * @param left * the left tree * @param right * the right tree * @return the <code>Matchings</code> */ private Matchings<T> getMatchings(MergeContext context, T left, T right) { boolean fullyOrderedChildren = false; if (context.isUseMCESubtreeMatcher()) { Stream<T> lCStr = left.getChildren().stream(); Stream<T> rCStr = right.getChildren().stream(); fullyOrderedChildren = lCStr.allMatch(fullyOrdered::contains) && rCStr.allMatch(fullyOrdered::contains); } boolean onlyOrderedChildren = orderedChildren.contains(left) && orderedChildren.contains(right); boolean onlyLabeledChildren = uniquelyLabeledChildren.contains(left) && uniquelyLabeledChildren.contains(right); calls++; Matchings<T> matchings; if (fullyOrderedChildren && context.isUseMCESubtreeMatcher()) { orderedCalls++; logMatcherUse(mceSubtreeMatcher.getClass(), left, right); matchings = mceSubtreeMatcher.match(context, left, right); } else if (onlyOrderedChildren) { orderedCalls++; logMatcherUse(orderedMatcher.getClass(), left, right); matchings = orderedMatcher.match(context, left, right); } else { unorderedCalls++; if (onlyLabeledChildren) { logMatcherUse(unorderedLabelMatcher.getClass(), left, right); matchings = unorderedLabelMatcher.match(context, left, right); } else { logMatcherUse(unorderedMatcher.getClass(), left, right); matchings = unorderedMatcher.match(context, left, right); } } if (context.getCMMatcherMode() != CMMode.INTEGRATED) { return matchings; } Optional<Matching<T>> oMatch = matchings.get(left, right); if (oMatch.isPresent()) { Matching<T> prevMatch = oMatch.get(); if (prevMatch.getPercentage() > 0 && prevMatch.getPercentage() < context.getCmReMatchBound()) { //TODO we may want to remove the first condition Matchings<T> newMatchings = cmMatcher.match(context, left, right); oMatch = newMatchings.get(left, right); if (oMatch.isPresent() && oMatch.get().getPercentage() > prevMatch.getPercentage()) { matchings = newMatchings; } } } else { LOG.warning(() -> "Did not receive a matching for " + left + " " + right + " from the concrete matchers."); } return matchings; } /** * If <code>left</code> and <code>right</code> do not match, this method attempts to find two <code>Artifacts</code> * (children of <code>left</code> and <code>right</code>) with which to resume matching the two trees. Depending * on the type of the <code>Artifact</code>s a different lookahead will be performed. E.g. in the case of two * METHOD <code>Artifact</code>s they themselves will be returned to try and detect renamings. If one of them is * a TRY <code>Artifact</code>, the method will attempt to find a node matching the other and return them as a * tuple. This is an attempt to find code that was surrounded by a try/catch block. * * @param context * the <code>MergeContext</code> * @param left * the left tree * @param right * the right tree * @return optionally the two <code>Artifact</code>s to try and match instead of <code>left</code> and * <code>right</code> */ private Optional<UnorderedTuple<T, T>> lookAhead(MergeContext context, T left, T right) { if (!context.isLookAhead()) { return Optional.empty(); } KeyEnums.Type lType = left.getType(); KeyEnums.Type rType = right.getType(); int leftLAH = context.getLookahead(lType); int rightLAH = context.getLookahead(rType); if (leftLAH == LOOKAHEAD_OFF && rightLAH == LOOKAHEAD_OFF) { return Optional.empty(); } if (lType == METHOD && rType == METHOD) { assert leftLAH != LOOKAHEAD_OFF && rightLAH != LOOKAHEAD_OFF; return Optional.of(UnorderedTuple.of(left, right)); } else if (lType == TRY) { Optional<T> resume = findMatchingNode(left, right, leftLAH); if (resume.isPresent()) { return Optional.of(UnorderedTuple.of(resume.get(), right)); } else { return Optional.empty(); } } else if (rType == TRY) { Optional<T> resume = findMatchingNode(right, left, rightLAH); if (resume.isPresent()) { return Optional.of(UnorderedTuple.of(left, resume.get())); } else { return Optional.empty(); } } else { return Optional.empty(); } } /** * Performs a depth first search of the given <code>tree</code> and returns the first node matching * <code>nodeToFind</code> as per the {@link Artifact#matches(Artifact)} method. * * @param tree * the tree to search in * @param nodeToFind * the node to find a match for * @param maxDepth * the maximum depth of nodes to consider (root is a depth 0) * @return optionally a matching node for <code>nodeToFind</code> */ private Optional<T> findMatchingNode(T tree, T nodeToFind, int maxDepth) { if (maxDepth < 0) { return Optional.empty(); } if (tree.matches(nodeToFind)) { return Optional.of(tree); } for (T child : tree.getChildren()) { Optional<T> matchingNode = findMatchingNode(child, nodeToFind, maxDepth - 1); if (matchingNode.isPresent()) { return matchingNode; } } return Optional.empty(); } /** * Logs the use of a <code>MatcherInterface</code> implementation to match <code>left</code> and * <code>right</code>. * * @param c the <code>MatcherInterface</code> that is used * @param left the left <code>Artifact</code> that is matched * @param right the right <code>Artifact</code> that is matched */ private void logMatcherUse(Class<?> c, T left, T right) { LOG.finest(() -> { String matcherName = c.getSimpleName(); return String.format("%s.match(%s, %s)", matcherName, left.getId(), right.getId()); }); } /** * Stores the <code>Matching</code>s contained in <code>matchings</code> in the <code>Artifact</code>s they * match. * * @param context * the <code>MergeContext</code> of the current merge * @param matchings * the <code>Matchings</code> to store * @param color * the <code>Color</code> used to highlight the matchings in the debug output */ public void storeMatchings(MergeContext context, Matchings<T> matchings, Color color) { LOG.finest("Store matching information within nodes."); for (Matching<T> matching : matchings.optimized()) { if (matching.getScore() > 0) { T left = matching.getLeft(); T right = matching.getRight(); KeyEnums.Type rType = right.getType(); KeyEnums.Type lType = left.getType(); if (context.getCMMatcherMode() == CMMode.OFF && context.getLookahead(lType) == LOOKAHEAD_OFF && context.getLookahead(rType) == LOOKAHEAD_OFF && !left.matches(right)) { String format = "Tried to store a non-lookahead matching between %s and %s that do not match.\n" + "The offending matching was created by %s!"; String msg = String.format(format, left.getId(), right.getId(), matching.getAlgorithm()); throw new RuntimeException(msg); } matching.setHighlightColor(color); left.addMatching(matching); right.addMatching(matching); LOG.finest(String.format("Store matching for %s and %s (%s).", left.getId(), right.getId(), matching.getAlgorithm())); } } } /** * Returns a formatted string describing the logged call counts. * * @return a log of the call counts */ private String getLog() { assert (calls == unorderedCalls + orderedCalls + equalityCalls) : String.format("Wrong sum for matcher calls: %d + %d + %d != %d", unorderedCalls, orderedCalls, equalityCalls, calls); return "Matcher calls (all/ordered/unordered/equality): " + calls + "/" + orderedCalls + "/" + unorderedCalls + "/" + equalityCalls; } }