/**
* Copyright (C) 2013-2014 Olaf Lessenich
* Copyright (C) 2014-2015 University of Passau, Germany
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
* MA 02110-1301 USA
*
* Contributors:
* Olaf Lessenich <lessenic@fim.uni-passau.de>
* Georg Seibt <seibt@fim.uni-passau.de>
*/
package de.fosd.jdime.matcher.cost_model;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import de.fosd.jdime.artifact.Artifact;
import de.fosd.jdime.config.merge.MergeContext;
import de.fosd.jdime.util.Tuple;
import org.apache.commons.math3.distribution.IntegerDistribution;
import org.apache.commons.math3.distribution.PascalDistribution;
import org.apache.commons.math3.random.RandomAdaptor;
import org.apache.commons.math3.random.Well19937c;
import static de.fosd.jdime.stats.KeyEnums.Type.CLASS;
import static de.fosd.jdime.stats.KeyEnums.Type.METHOD;
/**
* A container class for the parameters of the <code>CostModelMatcher</code>. Certain caches for speeding up successive
* calls to {@link CostModelMatcher#cost(CMMatchings, CMParameters)} are also managed by this class.
*/
public final class CMParameters<T extends Artifact<T>> {
/**
* The cost of not matching an artifact.
*/
float wn;
/**
* The function determining the cost of renaming an artifact. This cost is 0 if the artifacts match according to
* {@link Artifact#matches}.
*/
CostModelMatcher.SimpleWeightFunction<T> wr;
/**
* The function determining the cost of ancestry violations.
*/
CostModelMatcher.WeightFunction<T> wa;
/**
* The function determining the cost of breaking up sibling groups.
*/
CostModelMatcher.WeightFunction<T> ws;
/**
* The function determining the cost of an edge that violates ordering.
*/
CostModelMatcher.SimpleWeightFunction<T> wo;
/**
* The PRNG used during the execution of the {@link CostModelMatcher#match(MergeContext, Artifact, Artifact)}
* function.
*/
RandomAdaptor rng;
/**
* A {@link PascalDistribution} from which indices into the list of available edges may be sampled. The probability
* distribution is dictated by {@link this#pAssign}.
*/
IntegerDistribution assignDist;
/**
* The chance that an edge is chosen when traversing the available edges in
* {@link CostModelMatcher#complete(CMMatchings, CMParameters)}.
*/
float pAssign;
/**
* Percentages (numbers from [0, 1]) indicating how many matchings from the previous iteration should be fixed
* when proposing a new set of matchings.
*/
float fixLower;
float fixUpper;
/**
* Scaling factor for the difference in cost of two proposed sets of matchings. A higher value makes it less likely
* that a set of matchings is accepted despite having a higher cost than the previous reference set.
*/
float beta;
/**
* Whether the cost calculations (both exact and bounded) are executed for all edges in parallel.
*/
boolean parallel;
boolean fixRandomPercentage;
/*
* Caches valid for the entirety of the CostModelMatcher#match(MergeContext, Artifact, Artifact) function.
*/
ConcurrentMap<Tuple<T, T>, Tuple<T, T>> lcaCache;
ConcurrentMap<T, List<T>> siblingCache;
ConcurrentMap<T, List<T>> otherSiblingsCache;
/*
* Caches valid during one run of the CostModelMatcher#cost(CMMatchings, CMParameters) function.
*/
/**
* Caches the <code>CMMatching</code>s containing an artifact.
*/
ConcurrentMap<T, CMMatching<T>> exactContainsCache;
/*
* Caches valid during one run of the CostModelMatcher#boundCost(CMMatchings, CMParameters) function.
*/
/**
* Caches lists of <code>CMMatching</code>s containing an artifact.
*/
ConcurrentMap<T, List<CMMatching<T>>> boundContainsCache;
/**
* Constructs a new <code>CMParameters</code> configured from the given <code>MergeContext</code>.
*
* @param context
* the <code>MergeContext</code> to use
*/
public CMParameters(MergeContext context) {
setNoMatchWeight(context.getWn());
setRenamingWeight(context.getWr());
setAncestryViolationWeight(context.getWa());
setSiblingGroupBreakupWeight(context.getWs());
setOrderingWeight(context.getWo());
rng = new RandomAdaptor(context.getSeed().map(Well19937c::new).orElse(new Well19937c()));
assignDist = new PascalDistribution(rng, 1, context.getpAssign());
setPAssign(context.getpAssign());
setFixLower(context.getFixLower());
setFixUpper(context.getFixUpper());
setBeta(30);
setParallel(context.isCmMatcherParallel());
setFixRandomPercentage(context.isCmMatcherFixRandomPercentage());
lcaCache = new ConcurrentHashMap<>();
siblingCache = new ConcurrentHashMap<>();
otherSiblingsCache = new ConcurrentHashMap<>();
exactContainsCache = new ConcurrentHashMap<>();
boundContainsCache = new ConcurrentHashMap<>();
}
/**
* Sets the no-match weighting function to return the given <code>wn</code>.
*
* @param wn
* the no-match weight
*/
public void setNoMatchWeight(float wn) {
this.wn = wn;
}
/**
* Sets the renaming weighting function to return the given <code>wr</code>.
*
* @param wr
* the renaming weight
*/
public void setRenamingWeight(float wr) {
setRenamingWeight(matching -> {
float ease = 0.1f;
if (matching.m.getType() == METHOD && matching.n.getType() == METHOD) {
return ease * wr;
}
if (matching.m.getType() == CLASS && matching.n.getType() == CLASS) {
return ease * wr;
}
return wr;
});
}
/**
* Sets the renaming weighting function.
*
* @param wr
* the new renaming weighting function
*/
public void setRenamingWeight(CostModelMatcher.SimpleWeightFunction<T> wr) {
this.wr = wr;
}
/**
* Sets the ancestry violation weighting function to multiply the cost with <code>wn</code>.
*
* @param wa
* the new ancestry violation weight
*/
public void setAncestryViolationWeight(float wa) {
setAncestryViolationWeight((matching, quantity) -> wa * quantity);
}
/**
* Sets the ancestry violation weighting function.
*
* @param wa
* the new ancestry violation weighting function
*/
public void setAncestryViolationWeight(CostModelMatcher.WeightFunction<T> wa) {
this.wa = wa;
}
/**
* Sets the sibling group breakup weighting function to multiply the cost with <code>ws</code>.
*
* @param ws
* the new sibling group breakup weight
*/
public void setSiblingGroupBreakupWeight(float ws) {
setSiblingGroupBreakupWeight((matching, quantity) -> ws * quantity);
}
/**
* Sets the sibling group breakup weighting function.
*
* @param ws
* the new sibling group breakup weighting function
*/
public void setSiblingGroupBreakupWeight(CostModelMatcher.WeightFunction<T> ws) {
this.ws = ws;
}
/**
* Sets the ordering violation weighting function to return <code>wo</code>.
*
* @param wo
* the new ordering violation weight
*/
public void setOrderingWeight(float wo) {
setOrderingWeight(matching -> wo);
}
/**
* Sets the ordering violation weighting function.
*
* @param wo
* the new ordering violation weighting function
*/
public void setOrderingWeight(CostModelMatcher.SimpleWeightFunction<T> wo) {
this.wo = wo;
}
/**
* Sets the fixing probability used in {@link CostModelMatcher#complete(CMMatchings, CMParameters)}.
*
* @param pAssign
* the new fixing probability
*/
public void setPAssign(float pAssign) {
this.pAssign = pAssign;
}
/**
* Sets the lower bound for the number of matchings being fixed for the next iteration.
*
* @param fixLower
* the lower bound, must be from [0, 1]
*/
public void setFixLower(float fixLower) {
checkInRange(0, 1, fixLower);
this.fixLower = fixLower;
}
/**
* Sets the upper bound for the number of matchings being fixed for the next iteration.
*
* @param fixUpper
* the upper bound, must be from [0, 1]
*/
public void setFixUpper(float fixUpper) {
checkInRange(0, 1, fixUpper);
this.fixUpper = fixUpper;
}
/**
* Checks whether <code>val</code> is in the range [<code>lower</code>, <code>upper</code>].
*
* @param lower
* the lower bound
* @param upper
* the upper bound
* @param val
* the value to check
* @throws IllegalArgumentException
* if <code>val</code> is not from [<code>lower</code>, <code>upper</code>]
*/
private void checkInRange(float lower, float upper, float val) {
if (!(lower <= val && val <= upper)) {
throw new IllegalArgumentException(String.format("%s is not in the range [%s, %s]", val, lower, upper));
}
}
/**
* Sets the cost scaling factor beta used in the objective function.
*
* @param beta
* the new beta
*/
public void setBeta(float beta) {
this.beta = beta;
}
/**
* Sets whether cost calculations should be performed in parallel for all matchings.
*
* @param parallel
* whether cost calculations should be parallel
*/
public void setParallel(boolean parallel) {
this.parallel = parallel;
}
/**
* Sets whether to fix a random percentage of matchings instead of the first (randomly many) matchings.
*
* @param fixRandomPercentage
* whether to fix a random percentage of matchings
* @see #setFixLower(float)
* @see #setFixUpper(float)
*/
public void setFixRandomPercentage(boolean fixRandomPercentage) {
this.fixRandomPercentage = fixRandomPercentage;
}
/**
* Clears the caches that are only valid for one exact cost calculation.
*/
public void clearExactCaches() {
exactContainsCache.clear();
}
/**
* Clears the caches that are only valid for one bounded cost calculation.
*/
public void clearBoundCaches() {
boundContainsCache.clear();
}
}