CostModelMatcher.java example

Explorer
jdime-master
- src
  - de
    - fosd
      - jdime
        JDimeWrapper.java
        Main.java
        artifact
        Artifact.java
        ArtifactList.java
        Artifacts.java
        ast
        ASTNodeArtifact.java
        file
        FileArtifact.java
        config
        CommandLineConfigSource.java
        JDimeConfig.java
        merge
        MergeContext.java
        MergeScenario.java
        MergeType.java
        Revision.java
        execption
        AbortException.java
        NotYetImplementedException.java
        UnsupportedMergeTypeException.java
        gui
        GUI.java
        GraphvizParser.java
        History.java
        State.java
        TreeDumpNode.java
        matcher
        Matcher.java
        MatcherInterface.java
        cost_model
        Bounds.java
        CMMatching.java
        CMMatchings.java
        CMMode.java
        CMParameters.java
        CostModelMatcher.java
        matching
        Color.java
        LookAheadMatching.java
        Matching.java
        Matchings.java
        ordered
        EqualityMatcher.java
        OrderedMatcher.java
        mceSubtree
        BalancedSequence.java
        MCESubtreeMatcher.java
        simpleTree
        Direction.java
        Entry.java
        SimpleTreeMatcher.java
        unordered
        UniqueLabelMatcher.java
        UnorderedMatcher.java
        assignmentProblem
        AssignmentProblemMatcher.java
        HungarianAlgorithm.java
        HungarianMatcher.java
        merge
        Merge.java
        MergeInterface.java
        OrderedMerge.java
        UnorderedMerge.java
        operations
        AddOperation.java
        ConflictOperation.java
        DeleteOperation.java
        MergeOperation.java
        Operation.java
        stats
        ElementStatistics.java
        KeyEnums.java
        MergeScenarioStatistics.java
        MergeStatistics.java
        Statistics.java
        StatisticsInterface.java
        parser
        Content.java
        ParseResult.java
        Parser.java
        strategy
        CombinedStrategy.java
        LinebasedStrategy.java
        MergeStrategy.java
        NWayStrategy.java
        StrategyNotFoundException.java
        StructuredStrategy.java
        strdump
        DumpMode.java
        GraphvizTreeDump.java
        MatchingsTreeDump.java
        PlaintextTreeDump.java
        PrettyPrintDump.java
        StringDumper.java
        TGFTreeDump.java
        graphviz
        GraphvizAttribute.java
        GraphvizAttributeList.java
        GraphvizAttributeStmt.java
        GraphvizAttributeStmtType.java
        GraphvizEdge.java
        GraphvizElement.java
        GraphvizGraph.java
        GraphvizGraphBase.java
        GraphvizGraphType.java
        GraphvizNode.java
        GraphvizStatement.java
        GraphvizSubGraph.java
        util
        Tuple.java
        UnorderedTuple.java
- test
  - de
    - fosd
      - jdime
        JDimeTest.java
        MergeTest.java
        artifact
        ArtifactTest.java
        ArtifactsTest.java
        TestArtifact.java
        TestTrees.java
        ast
        ASTNodeArtifactTest.java
        matcher
        cost_model
        CostModelMatcherTest.java
        stats
        StatisticsTest.java
        parser
        ParseResultTest.java
        ParserTest.java
        strategy
        StatisticsInterfaceTest.java
/**
 * Copyright (C) 2013-2014 Olaf Lessenich
 * Copyright (C) 2014-2015 University of Passau, Germany
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA 02110-1301  USA
 *
 * Contributors:
 *     Olaf Lessenich <lessenic@fim.uni-passau.de>
 *     Georg Seibt <seibt@fim.uni-passau.de>
 */
package de.fosd.jdime.matcher.cost_model;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.stream.Stream;

import de.fosd.jdime.artifact.Artifact;
import de.fosd.jdime.artifact.ArtifactList;
import de.fosd.jdime.artifact.Artifacts;
import de.fosd.jdime.config.merge.MergeContext;
import de.fosd.jdime.matcher.MatcherInterface;
import de.fosd.jdime.matcher.matching.Matching;
import de.fosd.jdime.matcher.matching.Matchings;
import de.fosd.jdime.util.Tuple;
import org.apache.commons.math3.random.RandomGenerator;

import static de.fosd.jdime.matcher.cost_model.Bounds.BY_LOWER_UPPER;
import static java.lang.Integer.toHexString;
import static java.lang.System.identityHashCode;
import static java.util.Comparator.comparing;
import static java.util.logging.Level.FINER;
import static java.util.logging.Level.FINEST;
import static java.util.stream.Collectors.summingDouble;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toSet;
import static java.util.stream.Stream.concat;

/**
 * A <code>MatcherInterface</code> implementation based on the Flexible Tree Matching algorithm.
 *
 * @param <T> the type of the artifacts being matched
 * @see <a href="http://theory.stanford.edu/~tim/papers/ijcai11.pdf">The Paper</a>
 */
public class CostModelMatcher<T extends Artifact<T>> implements MatcherInterface<T> {

    private static final Logger LOG = Logger.getLogger(CostModelMatcher.class.getCanonicalName());

    /**
     * A function weighing a matching that incurred a cost.
     *
     * @param <T> the type of the artifacts
     */
    @FunctionalInterface
    public interface SimpleWeightFunction<T extends Artifact<T>> {

        float weigh(CMMatching<T> matching);
    }

    /**
     * A function weighing a matching that incurred a specific cost.
     *
     * @param <T> the type of the artifacts
     */
    @FunctionalInterface
    public interface WeightFunction<T extends Artifact<T>> {

        float weigh(CMMatching<T> matching, float quantity);
    }

    /**
     * The return type of {@link #objective(CMMatchings, CMParameters)} containing the value of the objective
     * function and the exact cost of the newly proposed set of <code>CMMatching</code>s.
     */
    private final class ObjectiveValue {

        public final double objValue;
        public final float matchingsCost;

        public ObjectiveValue(double objValue, float matchingsCost) {
            this.objValue = objValue;
            this.matchingsCost = matchingsCost;
        }
    }

    /**
     * The return type of {@link #acceptanceProb(double, CMMatchings, CMParameters)} containing the probability
     * of the newly proposed set of <code>CMMatching</code>s being accepted for the next iteration and the
     * <code>ObjectiveValue</code> for the proposed matchings.
     */
    private final class AcceptanceProbability {

        public final double acceptanceProbability;
        public final ObjectiveValue mHatObjectiveValue;

        public AcceptanceProbability(double acceptanceProbability, ObjectiveValue mHatObjectiveValue) {
            this.acceptanceProbability = acceptanceProbability;
            this.mHatObjectiveValue = mHatObjectiveValue;
        }
    }

    /**
     * Returns the exact cost of the given set of <code>matchings</code>.
     *
     * @param context
     *         the <code>MergeContext</code> containing the parameters to be used
     * @param matchings
     *         the matchings to calculate the cost for
     * @param left
     *         the left root
     * @param right
     *         the right root
     * @return the exact cost based on the weights in <code>context</code>
     */
    public float cost(MergeContext context, Matchings<T> matchings, T left, T right) {

        if (matchings.isEmpty()) {
            return 0;
        }

        Set<T> leftUnmatched = new LinkedHashSet<>(Artifacts.dfs(left));
        Set<T> rightUnmatched = new LinkedHashSet<>(Artifacts.dfs(right));

        CMMatchings<T> cmMatchings = new CMMatchings<>(left, right);

        for (Matching<T> matching : matchings) {
            cmMatchings.add(new CMMatching<>(matching.getLeft(), matching.getRight()));

            leftUnmatched.remove(matching.getLeft());
            rightUnmatched.remove(matching.getRight());
        }

        for (T l : leftUnmatched) {
            cmMatchings.add(new CMMatching<>(l, null));
        }

        for (T r : rightUnmatched) {
            cmMatchings.add(new CMMatching<>(null, r));
        }

        return cost(cmMatchings, new CMParameters<>(context));
    }

    /**
     * Returns the exact cost of the given <code>matchings</code>. This assumes that <code>matchings</code> contains
     * for every node in the left and right tree exactly one <code>CMMatching</code> containing the node.
     * The exact cost computed for every <code>CMMatching</code> can be retrieved using
     * ({@link CMMatching#getExactCost()} after this call.
     *
     * @param matchings
     *         the <code>CMMatchings</code>s to evaluate
     * @param parameters
     *          the <code>CMParameters</code> to use
     * @return the cost based on the weight functions in <code>parameters</code>
     */
    private float cost(CMMatchings<T> matchings, CMParameters<T> parameters) {

        if (!matchings.sane()) {
            throw new IllegalArgumentException("The given list of matchings has an invalid format. A list of " +
                    "matchings where every artifact from the left and right tree occurs in exactly one matching is " +
                    "required. Matchings matching artifacts that do not occur in the left or right tree are not " +
                    "allowed.");
        }

        if (matchings.isEmpty()) {
            return 0;
        }

        if (parameters.parallel) {
            matchings.parallelStream().forEach(m -> cost(m, matchings, parameters));
        } else {
            matchings.forEach(m -> cost(m, matchings, parameters));
        }

        float sumCost = matchings.stream().collect(summingDouble(CMMatching::getExactCost)).floatValue();
        sumCost *= (1.0f / (matchings.left.getTreeSize() + matchings.right.getTreeSize()));

        parameters.clearExactCaches();

        return sumCost;
    }

    /**
     * Sets the exact cost ({@link CMMatching#setExactCost(float)}) of the given <code>matching</code> based on
     * the given set of <code>matchings</code>.
     *
     *  @param matching
     *         the <code>CMMatching</code> to compute the cost for
     * @param matchings
     *         the complete <code>CMMatching</code>s
     * @param parameters
     *         the <code>CMParameters</code> to use
     */
    private void cost(CMMatching<T> matching, CMMatchings<T> matchings, CMParameters<T> parameters) {

        if (matching.isNoMatch()) {
            matching.setExactCost(parameters.wn);
            return;
        }

        float cR = renamingCost(matching, parameters);
        float cA = ancestryViolationCost(matching, matchings, parameters);
        float cS = siblingGroupBreakupCost(matching, matchings, parameters);
        float cO = orderingCost(matching, matchings, parameters);

        matching.setExactCost(cR + cA + cS + cO);
    }

    /**
     * Returns the cost for renaming the node. The cost will be zero if the <code>Artifact</code>s match according to
     * {@link Artifact#matches(Artifact)}, otherwise it is determined by the set renaming weight function
     * in <code>parameters</code>.
     *
     * @param matching
     *         the <code>CMMatching</code> to compute the cost for
     * @return the exact renaming cost of the <code>matching</code>
     */
    private float renamingCost(CMMatching<T> matching, CMParameters<T> parameters) {
        if (matching.m.matches(matching.n)) {
            return 0;
        } else {
            return parameters.wr.weigh(matching);
        }
    }

    /**
     * Returns the exact ancestry violation cost for <code>matching</code>.
     *
     * @param matching
     *         the matching to calculate the cost for
     * @param matchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the exact ancestry violation cost
     */
    private float ancestryViolationCost(CMMatching<T> matching, CMMatchings<T> matchings, CMParameters<T> parameters) {
        int numM = numAncestryViolatingChildren(matching.m, matching.n, matchings, parameters);
        int numN = numAncestryViolatingChildren(matching.n, matching.m, matchings, parameters);

        return parameters.wa.weigh(matching, numM + numN);
    }

    /**
     * Returns the number of children of <code>m</code> that violate ancestry of <code>m</code> is matched with
     * <code>n</code>.
     *
     * @param m
     *         the artifact to return the number of ancestry violating children for
     * @param n
     *         the artifact <code>m</code> is being matched with
     * @param matchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the number of children of <code>m</code> violating ancestry
     */
    private int numAncestryViolatingChildren(T m, T n, CMMatchings<T> matchings, CMParameters<T> parameters) {
        ArtifactList<T> mChildren = m.getChildren();
        ArtifactList<T> nChildren = n.getChildren();

        Predicate<T> filter = a -> a != null && !nChildren.contains(a);

        return (int) mChildren.stream().map(mChild -> image(mChild, matchings, parameters)).filter(filter).count();
    }

    /**
     * Returns the exact sibling group breakup cost for <code>matching</code>.
     *
     * @param matching
     *         the matching to calculate the cost for
     * @param matchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the exact sibling group breakup cost
     */
    private float siblingGroupBreakupCost(CMMatching<T> matching, CMMatchings<T> matchings, CMParameters<T> parameters) {
        List<T> dMm, iMm;
        Set<T> fMm;
        List<T> dMn, iMn;
        Set<T> fMn;
        float mCost;
        float nCost;

        dMm = siblingDivergentSubset(matching.m, matching.n, matchings, parameters);

        if (dMm.isEmpty()) {
            mCost = 0;
        } else {
            iMm = siblingInvariantSubset(matching.m, matching.n, matchings, parameters);
            fMm = distinctSiblingFamilies(matching.m, matchings, parameters);
            mCost = (float) dMm.size() / (iMm.size() * fMm.size());
        }

        dMn = siblingDivergentSubset(matching.n, matching.m, matchings, parameters);

        if (dMn.isEmpty()) {
            nCost = 0;
        } else {
            iMn = siblingInvariantSubset(matching.n, matching.m, matchings, parameters);
            fMn = distinctSiblingFamilies(matching.n, matchings, parameters);
            nCost = (float) dMn.size() / (iMn.size() * fMn.size());
        }

        return parameters.ws.weigh(matching, mCost + nCost);
    }

    /**
     * Returns the sibling invariant subset of siblings of <code>m</code>.
     *
     * @param m
     *         the artifact for whose siblings the sibling invariant subset is to be returned
     * @param n
     *         the artifact <code>m</code> is being matched with
     * @param matchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the sibling invariant subset
     */
    private List<T> siblingInvariantSubset(T m, T n, CMMatchings<T> matchings, CMParameters<T> parameters) {
        List<T> mSiblings = siblings(m, matchings, parameters);
        List<T> nSiblings = siblings(n, matchings, parameters);

        return mSiblings.stream().filter(s -> nSiblings.contains(image(s, matchings, parameters))).collect(toList());
    }

    /**
     * Returns the sibling divergent subset of siblings of <code>m</code>.
     *
     * @param m
     *         the artifact for whose siblings the sibling divergent subset is to be returned
     * @param n
     *         the artifact <code>m</code> is being matched with
     * @param matchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the sibling divergent subset
     */
    private List<T> siblingDivergentSubset(T m, T n, CMMatchings<T> matchings, CMParameters<T> parameters) {
        List<T> inv = siblingInvariantSubset(m, n, matchings, parameters);
        List<T> sibs = siblings(m, matchings, parameters);
        return sibs.stream().filter(sibling -> !inv.contains(sibling) && image(sibling, matchings, parameters) != null)
                            .collect(toList());
    }

    /**
     * Returns the set of distinct sibling families that siblings of <code>m</code> are matched into represented by
     * their parent artifact. For the root, <code>null</code> will be included in the set.
     *
     * @param m
     *         the artifact for whose siblings the distinct sibling families are to be returned
     * @param matchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the distinct sibling family representatives
     */
    private Set<T> distinctSiblingFamilies(T m, CMMatchings<T> matchings, CMParameters<T> parameters) {
        Function<T, T> image = mChild -> image(mChild, matchings, parameters);
        Predicate<T> notNull = t -> t != null;
        Function<T, T> getParent = Artifact::getParent;

        return siblings(m, matchings, parameters).stream().map(image).filter(notNull).map(getParent).collect(toSet());
    }

    /**
     * Returns the exact ordering cost for <code>matching</code>.
     *
     * @param matching
     *         the matching to calculate the cost for
     * @param matchings
     *         all matchings
     * @param parameters
     *         the cost model paramters
     * @return the exact ordering cost
     */
    private float orderingCost(CMMatching<T> matching, CMMatchings<T> matchings, CMParameters<T> parameters) {
        Stream<T> leftSiblings = otherSiblings(matching.m, matchings, parameters).stream();
        Stream<T> rightSiblings = otherSiblings(matching.n, matchings, parameters).stream();
        Stream<CMMatching<T>> s = concat(leftSiblings, rightSiblings).map(a -> matching(a, matchings, parameters))
                                                                     .filter(m -> !m.isNoMatch()).distinct();

        if (s.anyMatch(toCheck -> violatesOrdering(toCheck, matching, matchings, parameters))) {
            return parameters.wo.weigh(matching);
        } else {
            return 0;
        }
    }

    /**
     * Tests whether <code>toCheck</code> violates the ordering induced by <code>matching</code>.
     *
     * @param toCheck
     *         the matching to check
     * @param matching
     *         the matching introducing an ordering
     * @param matchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return true iff <code>toCheck</code> violates the ordering induced by <code>matching</code>
     */
    private boolean violatesOrdering(CMMatching<T> toCheck, CMMatching<T> matching, CMMatchings<T> matchings, CMParameters<T> parameters) {
        Tuple<T, T> leftSides = lca(toCheck.m, matching.m, matchings, parameters);
        Tuple<T, T> rightSides = lca(toCheck.n, matching.n, matchings, parameters);
        List<T> leftSiblings = siblings(leftSides.x, matchings, parameters);
        List<T> rightSiblings = siblings(rightSides.x, matchings, parameters);

        if (concat(leftSiblings.stream(), rightSiblings.stream()).noneMatch(T::isOrdered)) {
            return false;
        }

        int leftXi = leftSiblings.indexOf(leftSides.x);
        int leftYi = leftSiblings.indexOf(leftSides.y);
        int rightXi = rightSiblings.indexOf(rightSides.x);
        int rightYi = rightSiblings.indexOf(rightSides.y);
        
        if (leftXi < leftYi) {
            return rightXi > rightYi;
        } else if (leftXi > leftYi) {
            return rightXi < rightYi;
        }

        return false; // TODO weird case, maybe true is better?
    }

    /**
     * Returns the path from the given <code>artifact</code> to the root node of the tree it is a part of.
     *
     * @param artifact
     *         the <code>Artifact</code> to return the path for
     * @return the path represented by a list of <code>Artifact</code>s beginning with <code>artifact</code> and ending
     *          with the root of the tree
     */
    private List<T> pathToRoot(T artifact) {
        List<T> path = new ArrayList<>();

        do {
            path.add(artifact);
            artifact = artifact.getParent();
        } while (artifact != null);

        return path;
    }

    /**
     * Finds the lowest pair of (possibly different) ancestors of <code>a</code> and <code>b</code> that are part of the
     * same sibling group.
     *
     * @param a
     *         the first <code>Artifact</code>
     * @param b
     *         the second <code>Artifact</code>
     * @param matchings
     *         the current <code>CMMatching</code>
     * @param parameters
     *         the <code>CMParameters</code> to use
     * @return the ancestor of the first <code>Artifact</code> in the first position, that of the second in the second
     *          position
     */
    private Tuple<T, T> lca(T a, T b, CMMatchings<T> matchings, CMParameters<T> parameters) {
        return parameters.lcaCache.computeIfAbsent(Tuple.of(a, b), ab -> {
            Tuple<T, T> ba = Tuple.of(b, a);

            if (parameters.lcaCache.containsKey(ba)) {
                Tuple<T, T> baLCS = parameters.lcaCache.get(ba);
                return Tuple.of(baLCS.y, baLCS.x);
            }

            if (siblings(a, matchings, parameters).contains(b)) {
                return ab;
            }

            List<T> aPath = pathToRoot(a);
            List<T> bPath = pathToRoot(b);
            ListIterator<T> aIt = aPath.listIterator(aPath.size());
            ListIterator<T> bIt = bPath.listIterator(bPath.size());
            T l, r;

            do {
                l = aIt.previous();
                r = bIt.previous();
            } while (l == r && (aIt.hasPrevious() && bIt.hasPrevious()));

            return Tuple.of(l, r);
        });
    }

    /**
     * Finds the (first) <code>CMMatching</code> in <code>matchings</code> containing the given
     * <code>artifact</code>.
     *
     * @param artifact
     *         the <code>Artifact</code> for which the containing <code>CMMatching</code> is to be returned
     * @param matchings
     *         the current matchings
     * @param parameters
     *         the <code>CMParameters</code> to use
     * @return the <code>CMMatching</code> containing the <code>artifact</code>
     * @throws NoSuchElementException
     *         if no <code>CMMatching</code> containing <code>artifact</code> can be found in
     *         <code>matchings</code>
     */
    private CMMatching<T> matching(T artifact, CMMatchings<T> matchings, CMParameters<T> parameters) {

        return parameters.exactContainsCache.computeIfAbsent(artifact, a ->
            matchings.stream().filter(m -> m.contains(a)).findFirst().orElseThrow(() ->
                new NoSuchElementException("No matching containing " + artifact + " found.")
            )
        );
    }

    /**
     * Finds the (first) <code>CMMatching</code> in <code>matchings</code> containing the given
     * <code>artifact</code> and returns the other <code>Artifact</code> in the <code>CMMatching</code>.
     *
     * @param artifact
     *         the <code>Artifact</code> whose image is to be returned
     * @param matchings
     *         the current matchings
     * @return the matching partner of <code>artifact</code> in the given <code>matchings</code>
     * @throws NoSuchElementException
     *         if no <code>CMMatching</code> containing <code>artifact</code> can be found in
     *         <code>matchings</code>
     */
    private T image(T artifact, CMMatchings<T> matchings, CMParameters<T> parameters) {
        return matching(artifact, matchings, parameters).other(artifact);
    }

    /**
     * Sets the bounds ({@link CMMatching#setCostBounds(Bounds)}) for the cost of all current matchings.
     *
     * @param currentMatchings
     *         the current <code>CMMatchings</code>s being considered
     * @param parameters
     *         the <code>CMParameters</code> to use
     */
    private void boundCost(CMMatchings<T> currentMatchings, CMParameters<T> parameters) {
        LOG.finer(() -> "Bounding " + currentMatchings.size() + " matchings.");

        AtomicInteger mCount = LOG.isLoggable(FINEST) ? new AtomicInteger() : null;
        Consumer<CMMatching<T>> mPeek = m -> LOG.finest(() -> "Done with matching " + mCount.getAndIncrement() + " " + m);

        if (parameters.parallel) {
            currentMatchings.parallelStream().peek(mPeek).forEach(m -> boundCost(m, currentMatchings, parameters));
        } else {
            currentMatchings.stream().peek(mPeek).forEach(m -> boundCost(m, currentMatchings, parameters));
        }

        parameters.clearBoundCaches();
    }

    /**
     * Sets the bounds ({@link CMMatching#setCostBounds(Bounds)}) for the cost of the given <code>matching</code>
     * based on the given <code>currentMatchings</code>.
     *
     * @param matching
     *         the <code>CMMatching</code> whose costs are to be bounded
     * @param currentMatchings
     *         the current <code>CMMatchings</code>s being considered
     * @param parameters
     *         the <code>CMParameters</code> to use
     */
    private void boundCost(CMMatching<T> matching, CMMatchings<T> currentMatchings, CMParameters<T> parameters) {

        if (matching.isNoMatch()) {
            matching.setBounds(parameters.wn, parameters.wn);
            return;
        }

        float cR = renamingCost(matching, parameters);
        Bounds cABounds = boundAncestryViolationCost(matching, currentMatchings, parameters);
        Bounds cSBounds = boundSiblingGroupBreakupCost(matching, currentMatchings, parameters);
        Bounds cOBounds = boundOrderingCost(matching, currentMatchings, parameters);

        float lower = cR + cABounds.getLower() + cSBounds.getLower() + cOBounds.getLower();
        float upper = cR + cABounds.getUpper() + cSBounds.getUpper() + cOBounds.getUpper();

        matching.setBounds(lower, upper);
    }

    /**
     * Returns the bounded ancestry violation cost for <code>matching</code>.
     *
     * @param matching
     *         the matching to calculate the bounds for
     * @param currentMatchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the bounded ancestry violation cost
     */
    private Bounds boundAncestryViolationCost(CMMatching<T> matching, CMMatchings<T> currentMatchings, CMParameters<T> parameters) {
        T m = matching.m;
        T n = matching.n;

        Stream<T> mLower = m.getChildren().stream().filter(mChild -> ancestryIndicator(mChild, n, currentMatchings, false, parameters));
        Stream<T> nLower = n.getChildren().stream().filter(nChild -> ancestryIndicator(nChild, m, currentMatchings, false, parameters));

        Stream<T> mUpper = m.getChildren().stream().filter(mChild -> ancestryIndicator(mChild, n, currentMatchings, true, parameters));
        Stream<T> nUpper = n.getChildren().stream().filter(nChild -> ancestryIndicator(nChild, m, currentMatchings, true, parameters));

        int lowerBound = (int) (mLower.count() + nLower.count());
        int upperBound = (int) (mUpper.count() + nUpper.count());

        return new Bounds(parameters.wa.weigh(matching, lowerBound), parameters.wa.weigh(matching, upperBound));
    }

    /**
     * Evaluates the upper/lower ancestry violation indicator.
     *
     * @param child
     *         the child for which to check whether ancestry violation is possible/unavoidable
     * @param n
     *         the matching partner of the parent of <code>child</code>
     * @param currentMatchings
     *         all matchings
     * @param upper
     *         whether to evaluate the upper or lower indicator
     * @param parameters
     *         the cost model parameters
     * @return the value of the indicator function
     */
    private boolean ancestryIndicator(T child, T n, CMMatchings<T> currentMatchings, boolean upper, CMParameters<T> parameters) {

        if (upper) {
            Predicate<CMMatching<T>> indicator = match -> {
                T partner = match.other(child);
                return !(partner == null || n.getChildren().contains(partner));
            };

            return containing(child, currentMatchings, parameters).stream().anyMatch(indicator);
        } else {
            Predicate<CMMatching<T>> indicator = match -> {
                T partner = match.other(child);
                return partner == null || n.getChildren().contains(partner);
            };

            return containing(child, currentMatchings, parameters).stream().noneMatch(indicator);
        }
    }

    /**
     * Bounds the sibling group breakup cost for <code>matching</code>.
     *
     * @param matching
     *         the matching to bound the cost for
     * @param currentMatchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the bounded sibling group breakup cost
     */
    private Bounds boundSiblingGroupBreakupCost(CMMatching<T> matching, CMMatchings<T> currentMatchings, CMParameters<T> parameters) {
        T m = matching.m;
        T n = matching.n;

        float mnLower, nmLower, lower, mnUpper, nmUpper, upper;

        Bounds dMN = boundDivergentSiblings(m, n, currentMatchings, parameters);
        Bounds dNM = boundDivergentSiblings(n, m, currentMatchings, parameters);

        if (dMN.getLower() != 0 || dMN.getUpper() != 0) {
            Bounds iMN = boundInvariantSiblings(m, n, currentMatchings, parameters);
            mnLower = dMN.getLower() / (iMN.getUpper() * (dMN.getLower() + 1));
            mnUpper = dMN.getUpper() / iMN.getLower();
        } else {
            mnLower = 0;
            mnUpper = 0;
        }

        if (dNM.getLower() != 0 || dNM.getUpper() != 0) {
            Bounds iNM = boundInvariantSiblings(n, m, currentMatchings, parameters);
            nmLower = dNM.getLower() / (iNM.getUpper() * (dNM.getLower() + 1));
            nmUpper = dNM.getUpper() / iNM.getLower();
        } else {
            nmLower = 0;
            nmUpper = 0;
        }

        lower = parameters.ws.weigh(matching, mnLower + nmLower);
        upper = parameters.ws.weigh(matching, (mnUpper + nmUpper) / 2);

        return new Bounds(lower, upper);
    }

    /**
     * Bounds the size of the divergent sibling subset of siblings of <code>m</code>.
     *
     * @param m
     *         the artifact for whose siblings the size of the sibling divergent subset is to be bounded
     * @param n
     *         the artifact <code>m</code> is being matched with
     * @param currentMatchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the bounded size of the divergent sibling subset
     */
    private Bounds boundDivergentSiblings(T m, T n, CMMatchings<T> currentMatchings, CMParameters<T> parameters) {
        List<T> osibs = otherSiblings(m, currentMatchings, parameters);
        long lower = osibs.stream().filter(mSib -> divergentSiblingIndicator(mSib, n, currentMatchings, false, parameters)).count();
        long upper = osibs.stream().filter(mSib -> divergentSiblingIndicator(mSib, n, currentMatchings, true, parameters)).count();

        return new Bounds(lower, upper);
    }

    /**
     * Evaluates the upper/lower divergent sibling subset indicator.
     *
     * @param sibling
     *         the sibling for which to check whether inclusion in the sibling divergent subset is possible/unavoidable
     * @param n
     *         the artifact that the sibling of <code>sibling</code> is matched with
     * @param currentMatchings
     *         all matchings
     * @param upper
     *         whether to evaluate the upper or lower indicator
     * @param parameters
     *         the cost model parameters
     * @return the value of the indicator function
     */
    private boolean divergentSiblingIndicator(T sibling, T n, CMMatchings<T> currentMatchings, boolean upper, CMParameters<T> parameters) {

        if (upper) {
            Predicate<CMMatching<T>> indicator = match -> {
                T partner = match.other(sibling);
                return !(partner == null || otherSiblings(n, currentMatchings, parameters).contains(partner));
            };

            return containing(sibling, currentMatchings, parameters).stream().anyMatch(indicator);
        } else {
            Predicate<CMMatching<T>> indicator = match -> {
                T partner = match.other(sibling);
                return partner == null || otherSiblings(n, currentMatchings, parameters).contains(partner);
            };

            return containing(sibling, currentMatchings, parameters).stream().noneMatch(indicator);
        }
    }

    /**
     * Bounds the size of the invariant sibling subset of siblings of <code>m</code>.
     *
     * @param m
     *         the artifact for whose siblings the size of the sibling invariant subset is to be bounded
     * @param n
     *         the artifact <code>m</code> is being matched with
     * @param currentMatchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the bounded size of the invariant sibling subset
     */
    private Bounds boundInvariantSiblings(T m, T n, CMMatchings<T> currentMatchings, CMParameters<T> parameters) {
        List<T> osibs = otherSiblings(m, currentMatchings, parameters);
        long lower = osibs.stream().filter(mSib -> invariantSiblingIndicator(mSib, n, currentMatchings, false, parameters)).count();
        long upper = osibs.stream().filter(mSib -> invariantSiblingIndicator(mSib, n, currentMatchings, true, parameters)).count();

        return new Bounds(lower + 1, upper + 1);
    }

    /**
     * Evaluates the upper/lower invariant sibling subset indicator.
     *
     * @param sibling
     *         the sibling for which to check whether inclusion in the sibling invariant subset is possible/unavoidable
     * @param n
     *         the artifact that the sibling of <code>sibling</code> is matched with
     * @param currentMatchings
     *         all matchings
     * @param upper
     *         whether to evaluate the upper or lower indicator
     * @param parameters
     *         the cost model parameters
     * @return the value of the indicator function
     */
    private boolean invariantSiblingIndicator(T sibling, T n, CMMatchings<T> currentMatchings, boolean upper, CMParameters<T> parameters) {
        Predicate<CMMatching<T>> indicator = match -> otherSiblings(n, currentMatchings, parameters).contains(match.other(sibling));

        if (upper) {
            return containing(sibling, currentMatchings, parameters).stream().anyMatch(indicator);
        } else {
            return containing(sibling, currentMatchings, parameters).stream().allMatch(indicator);
        }
    }

    /**
     * Bounds the ordering violation cost of <code>matching</code>.
     *
     * @param matching
     *         the matching to bound the cost for
     * @param currentMatchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return the bounded ordering violation cost
     */
    private Bounds boundOrderingCost(CMMatching<T> matching, CMMatchings<T> currentMatchings, CMParameters<T> parameters) {
        float lower, upper;
        List<T> mosibs = otherSiblings(matching.m, currentMatchings, parameters);
        List<T> nosibs = otherSiblings(matching.n, currentMatchings, parameters);
        Stream<T> siblings = concat(mosibs.stream(), nosibs.stream());

        boolean orderingPossible = siblings.allMatch(sib ->
            containing(sib, currentMatchings, parameters).stream().anyMatch(match ->
                match.isNoMatch() || !violatesOrdering(match, matching, currentMatchings, parameters)
            )
        );

        if (!orderingPossible) {
            lower = parameters.wo.weigh(matching);
            upper = lower;
        } else {
            lower = 0;

            siblings = concat(mosibs.stream(), nosibs.stream());

            boolean violationPossible = siblings.anyMatch(sib ->
                containing(sib, currentMatchings, parameters).stream().anyMatch(match ->
                    !match.isNoMatch() && violatesOrdering(match, matching, currentMatchings, parameters)
                )
            );

            upper = violationPossible ? parameters.wo.weigh(matching) : 0;
        }

        return new Bounds(lower, upper);
    }

    /**
     * Returns a new <code>List</code> containing the children of the parent of <code>artifact</code> or an empty
     * <code>List</code> for the root node. This includes the <code>artifact</code> itself.
     *
     * @param artifact
     *         the <code>Artifact</code> whose siblings are to be returned
     * @param matchings
     *         the current <code>CMMatchings</code>
     * @param parameters
     *         the <code>CMParameters</code> to use
     * @return the siblings of the given <code>artifact</code>
     */
    private List<T> siblings(T artifact, CMMatchings<T> matchings, CMParameters<T> parameters) {
        return parameters.siblingCache.computeIfAbsent(artifact, a -> {
            List<T> siblings;

            if (artifact == matchings.left || artifact == matchings.right) {
                siblings = new ArrayList<>(Collections.singleton(a));
            } else {
                T parent = a.getParent();
                siblings = parent.getChildren()
                                 .stream()
                                 .filter(s -> s != a && parameters.siblingCache.containsKey(s))
                                 .map(s -> parameters.siblingCache.get(s)).findFirst()
                                 .orElseGet(() -> new ArrayList<>(parent.getChildren()));
            }

            return siblings;
        });
    }

    /**
     * Returns the siblings of <code>artifact</code> as in {@link #siblings(Artifact, CMMatchings, CMParameters)} but
     * does not include <code>artifact</code> itself.
     *
     * @param artifact
     *         the <code>Artifact</code> whose siblings are to be returned
     * @param matchings
     *         the current <code>CMMatchings</code>
     *@param parameters
     *         the <code>CMParameters</code> to use  @return the siblings of the given <code>artifact</code>
     */
    private List<T> otherSiblings(T artifact, CMMatchings<T> matchings, CMParameters<T> parameters) {
        return parameters.otherSiblingsCache.computeIfAbsent(artifact, a -> {
            List<T> siblings = new ArrayList<>(siblings(a, matchings, parameters));
            siblings.remove(a);

            return siblings;
        });
    }

    /**
     * Returns all matchings containing <code>artifact</code> from <code>currentMatchings</code>.
     *
     * @param artifact
     *         the artifact to search for
     * @param currentMatchings
     *         all matchings
     * @param parameters
     *         the cost model parameters
     * @return all matchings containig <code>artifact</code>
     */
    private List<CMMatching<T>> containing(T artifact, CMMatchings<T> currentMatchings, CMParameters<T> parameters) {
        return parameters.boundContainsCache.computeIfAbsent(artifact, a ->
            currentMatchings.stream().filter(m -> m.contains(a)).collect(toList())
        );
    }

    @Override
    public Matchings<T> match(MergeContext context, T left, T right) {
        return match(context, left, right, new CMMatchings<>(left, right));
    }

    /**
     * Matches the trees rooted in <code>left</code> and <code>right</code>. The matchings contained in
     * <code>preFixed</code> will be considered fixed and returned as is in addition to any matchings between previously
     * unmatched artifacts.
     *
     * @param context
     *         the <code>MergeContext</code> containing the parameters to use for the Flexible Tree Matching
     *         algorithm
     * @param left
     *         the left root
     * @param right
     *         the right root
     * @param preFixed
     *         the matchings between the left and right tree that are fixed
     * @return the resulting matchings
     */
    public Matchings<T> match(MergeContext context, T left, T right, Matchings<T> preFixed) {
        CMMatchings<T> cmPreFixed = new CMMatchings<>(left, right);

        for (Matching<T> matching : preFixed.optimized()) {
            cmPreFixed.add(new CMMatching<>(matching.getLeft(), matching.getRight()));
        }

        return match(context, left, right, cmPreFixed);
    }

    /**
     * Matches the tress rooted in <code>left</code> and <code>right</code> using the Metropolis algorithm and the
     * Flexible Tree Matching cost model.
     *
     * @param context
     *         the <code>MergeContext</code> containing the parameters to use for the Flexible Tree Matching
     *         algorithm
     * @param left
     *         the left root
     * @param right
     *         the right root
     * @param preFixed
     *         the matchings between the left and right tree that are fixed
     * @return the resulting matchings
     */
    private Matchings<T> match(MergeContext context, T left, T right, CMMatchings<T> preFixed) {
        CMParameters<T> parameters = new CMParameters<>(context);

        LOG.fine("Matching " + left + " and " + right + " using the " + getClass().getSimpleName());

        CMMatchings<T> m = initialize(preFixed, parameters);
        ObjectiveValue mObjVal = objective(m, parameters);

        CMMatchings<T> lowest = m;
        float lowestCost = mObjVal.matchingsCost;

        for (int i = 0; i < context.getCostModelIterations(); i++) {
            CMMatchings<T> mHat = propose(m, preFixed, parameters);
            AcceptanceProbability mHatAccProb = acceptanceProb(mObjVal.objValue, mHat, parameters);

            if (chance(parameters.rng, mHatAccProb.acceptanceProbability)) {

                log(FINER, mHat, () -> "Accepting the matchings.");

                m = mHat;
                mObjVal = mHatAccProb.mHatObjectiveValue;
            }

            if (mHatAccProb.mHatObjectiveValue.matchingsCost < lowestCost) {

                lowest = mHat;
                lowestCost = mHatAccProb.mHatObjectiveValue.matchingsCost;

                float finalLowestCost = lowestCost;
                log(FINER, mHat, () -> "New lowest cost matchings with cost " + finalLowestCost + " found.");
            }

            LOG.fine("End of iteration " + i);
        }

        LOG.fine(() -> "Matching ended after " + context.getCostModelIterations() + " iterations.");

        return convert(lowest);
    }

    /**
     * Returns <code>true</code> with a probability of <code>p</code>.
     *
     * @param rng
     *         the PRNG to sample from
     * @param p
     *         a number between 0.0 and 1.0
     * @return true or false depending on the next double returned by the PRNG
     */
    boolean chance(RandomGenerator rng, double p) {
        return rng.nextDouble() < p;
    }

    /**
     * Converts a <code>List</code> of <code>CMMatching</code>s to an equivalent <code>Set</code> of
     * <code>Matching</code>s.
     *
     * @param matchings
     *         the <code>CMMatching</code>s to convert
     * @return the resulting <code>Matchings</code>
     */
    private Matchings<T> convert(CMMatchings<T> matchings) {
        Map<T, T> mMap = matchings.asMap();

        Function<CMMatching<T>, Matching<T>> toMatching = m -> {
            Set<T> ls = new HashSet<>(Artifacts.dfs(m.m));
            Set<T> rs = new HashSet<>(Artifacts.dfs(m.n));
            int score = (int) ls.stream().filter(a -> rs.contains(mMap.get(a))).count();

            Matching<T> matching = new Matching<>(m.m, m.n, score);
            matching.setAlgorithm(CostModelMatcher.class.getSimpleName());
            return matching;
        };

        return matchings.stream().filter(m -> !m.isNoMatch()).map(toMatching)
                                 .collect(Matchings::new, Matchings::add, Matchings::addAll);
    }

    /**
     * Proposes a new set of <code>CMMatching</code>s based on the previous matchings <code>m</code>.
     *
     * @param m
     *         the matchings from the previous iteration
     * @param preFixed
     *         the matchings between the left and right tree that are fixed
     * @return the proposed matchings for the next iteration
     */
    private CMMatchings<T> propose(CMMatchings<T> m, CMMatchings<T> preFixed, CMParameters<T> parameters) {
        CMMatchings<T> mVariable = new CMMatchings<>(m, m.left, m.right);
        mVariable.removeAll(preFixed);

        int j;

        if (parameters.fixRandomPercentage) {
            int lower = (int) (parameters.fixLower * mVariable.size());
            int upper = (int) (parameters.fixUpper * mVariable.size());

            Collections.shuffle(mVariable, parameters.rng); // TODO a switch to turn this off
            j = intFromRange(lower, upper, parameters);
        } else {
            //TODO sort by exact cost?
            Collections.sort(mVariable, Comparator.comparing(CMMatching::getExactCost));
            j = parameters.rng.nextInt(mVariable.size());
        }

        CMMatchings<T> fixed = new CMMatchings<>(mVariable.subList(0, j), m.left, m.right);

        log(FINER, m, () -> "Fixing the first " + j + "variable matchings from the last iteration.");
        log(FINEST, m, () -> "They are: " + fixed);

        fixed.addAll(preFixed);

        CMMatchings<T> proposition = complete(fixed, parameters);

        log(FINER, proposition, () -> "Proposing matchings for the next iteration.");
        log(FINEST, proposition, () -> "Proposition is: " + proposition);

        return proposition;
    }

    /**
     * Returns a uniformly distributed random integer from the given range (inclusive).
     *
     * @param lower
     *         the lower bound
     * @param upper
     *         the upper bound
     * @param parameters
     *         the cost model parameters
     * @return a random int from [<code>lower</code>, <code>upper</code>]
     */
    private int intFromRange(int lower, int upper, CMParameters<T> parameters) {
        return lower + (int) (parameters.rng.nextFloat() * ((upper - lower) + 1));
    }

    /**
     * Constructs the initial set of matchings.
     *
     * @param preFixed
     *         the matchings between the left and right tree that are fixed
     * @param parameters
     *         the cost model parameters
     * @return the initial matchings
     */
    private CMMatchings<T> initialize(CMMatchings<T> preFixed, CMParameters<T> parameters) {
        CMMatchings<T> initial = complete(preFixed, parameters);

        log(FINER, initial, () -> "Initial set of matchings assembled.");
        log(FINEST, initial, () -> "Initial set is: " + initial);

        return initial;
    }

    /**
     * Completes the given <code>fixedMatchings</code> to a set of matchings in which every artifact from the left
     * and right tree is covered by exactly one matching.
     *
     * @param fixedMatchings
     *         the fixed matchings to complete
     * @param parameters
     *         the cost model parameters
     * @return the completed set of matchings
     */
    private CMMatchings<T> complete(CMMatchings<T> fixedMatchings, CMParameters<T> parameters) {
        CMMatchings<T> current = completeBipartiteGraph(fixedMatchings.left, fixedMatchings.right, parameters);
        CMMatchings<T> fixed = new CMMatchings<>(fixedMatchings, fixedMatchings.left, fixedMatchings.right);

        fixed.forEach(m -> prune(m, current));

        while (fixed.size() != current.size()) {

            boundCost(current, parameters);
            Collections.sort(current, comparing(CMMatching::getCostBounds, BY_LOWER_UPPER));

            CMMatchings<T> available = new CMMatchings<>(current, current.left, current.right);
            available.removeAll(fixed);

            int i;
            do {
                i = parameters.assignDist.sample();
            } while (i >= available.size());

            CMMatching<T> matching = available.get(i);

            fixed.add(matching);
            prune(matching, current);
        }

        return fixed;
    }

    /**
     * Removes the other matchings containing an artifact matched in <code>matching</code> from <code>g</code>.
     *
     * @param matching
     *         the matching to prune for
     * @param g
     *         the matchings to prune from
     */
    private void prune(CMMatching<T> matching, CMMatchings<T> g) {

        for (ListIterator<CMMatching<T>> it = g.listIterator(); it.hasNext();) {
            CMMatching<T> current = it.next();
            boolean neq = !matching.equals(current);

            if (neq && ((matching.m != null && matching.m == current.m) || (matching.n != null && matching.n == current.n))) {
                it.remove();
            }
        }
    }

    /**
     * Returns the (randomly ordered) complete bipartite graph between the trees rooted in <code>left</code> and
     * <code>right</code> with the addition of one no-match node (represented by <code>null</code>) each.
     *
     * @param left
     *         the left root
     * @param right
     *         the right root
     * @param parameters
     *         the cost model parameters
     * @return the complete bipartite graph with its edges represented by <code>CMMatching</code>s
     */
    private CMMatchings<T> completeBipartiteGraph(T left, T right, CMParameters<T> parameters) {
        List<T> leftNodes = Artifacts.bfs(left);
        List<T> rightNodes = Artifacts.bfs(right);

        // add the "No Match" node
        leftNodes.add(null);
        rightNodes.add(null);

        CMMatchings<T> bipartiteGraph = new CMMatchings<>(left, right);

        for (T lNode : leftNodes) {
            for (T rNode : rightNodes) {

                if (lNode != null || rNode != null) {
                    bipartiteGraph.add(new CMMatching<>(lNode, rNode));
                }
            }
        }

        Collections.shuffle(bipartiteGraph, parameters.rng);
        return bipartiteGraph;
    }

    /**
     * Returns the value of the objective function.
     *
     * @param matchings
     *         the matchings to return the objective function value for
     * @param parameters
     *         the cost model parameters
     * @return the value of the objective function and the cost that was calculated as part of it
     */
    private ObjectiveValue objective(CMMatchings<T> matchings, CMParameters<T> parameters) {
        float cost = cost(matchings, parameters);
        double objVal = Math.exp(-(parameters.beta * cost));

        log(FINER, matchings, () -> "Cost of matchings is " + cost);
        log(FINER, matchings, () -> "Objective function value for matchings is " + objVal);

        return new ObjectiveValue(objVal, cost);
    }

    /**
     * Returns the acceptance probability for the proposed set of matchings <code>mHat</code>.
     *
     * @param mObjectiveValue
     *         the objective value for the current reference set of matchings
     * @param mHat
     *         the newly proposed set of matchings
     * @param parameters
     *         the cost model parameters
     * @return the acceptance probability including the <code>ObjectiveValue</code> calculated for <code>mHat</code>
     */
    private AcceptanceProbability acceptanceProb(double mObjectiveValue, CMMatchings<T> mHat, CMParameters<T> parameters) {
        ObjectiveValue mHatObjectiveValue = objective(mHat, parameters);
        double acceptanceProb = Math.min(1, mHatObjectiveValue.objValue / mObjectiveValue);

        log(FINER, mHat, () -> "Acceptance probability for matchings is " + acceptanceProb);

        return new AcceptanceProbability(acceptanceProb, mHatObjectiveValue);
    }

    /**
     * Returns the hexadecimal identity hash code of the given <code>Object</code> as a <code>String</code>.
     *
     * @param o
     *         the <code>Object</code> to return the <code>String</code> id for
     * @return the <code>String</code> id
     */
    private String id(Object o) {
        return toHexString(identityHashCode(o));
    }

    /**
     * Logs the given <code>msg</code> using the {@link #LOG} and prepends the {@link #id(Object)} of the given
     * matchings.
     *
     * @param level
     *         the level to log at
     * @param matchings
     *         the matchings the message concerns
     * @param msg
     *         the message to log
     */
    private void log(Level level, CMMatchings<T> matchings, Supplier<String> msg) {
        LOG.log(level, () -> String.format("%-10s%s", id(matchings), msg.get()));
    }
}