package uk.ac.rhul.cs.cl1; import java.io.Serializable; import uk.ac.rhul.cs.cl1.quality.CohesivenessFunction; import uk.ac.rhul.cs.cl1.quality.QualityFunction; import uk.ac.rhul.cs.cl1.seeding.EveryNodeSeedGenerator; import uk.ac.rhul.cs.cl1.seeding.SeedGenerator; import uk.ac.rhul.cs.cl1.similarity.*; /** * Stores the parameters of a ClusterONE algorithm instance. * * There is a wide variety of parameters for the algorithm, but the default * settings are more or less sensible and suitable for biological scenarios. * * See the documentation of the class variables for more details on the parameters. * * @author tamas */ public class ClusterONEAlgorithmParameters implements Serializable { /** Minimum size of the clusters that will be returned */ protected int minSize = 3; /** Minimum density of the clusters that will be returned * * null means that the density limit will be set based on whether the graph * is weighted or unweighted, and in case of undirected graphs, whether the * transitivity is above or below a certain empirical threshold. */ protected Double minDensity = null; /** * Overlap threshold value. * No pair of complexes will have an overlap larger than this * in the result */ protected double overlapThreshold = 0.8; /** * Haircut threshold value. * * After generating cohesive subgroups, vertices having an internal weight * less than the average internal weight times this threshold will be * removed from the subgroups. If negative, no haircut will be performed. */ protected double haircutThreshold = 0; /** * k-core threshold value. * * After generating cohesive subgroups, those which do not contain a * k-core may be thrown away. k is specified by this threshold value. * If it is zero or negative, the filter will obviously be disabled. */ protected int kCoreThreshold = 0; /** * Node penalty. * * When nonzero, each node is assumed to have an extra boundary weight equal * to this amount, no matter what the other internal nodes are. This can * be used to account for noise in the initial data; see {@link uk.ac.rhul.cs.cl1.quality.CohesivenessFunction} * for more details. */ protected double nodePenalty = 2.0; /** * Whether to fluff the clusters. * * After generating cohesive subgroups, external boundary vertices * connected to more than 2/3 of the internal vertices will be added to * the subgroups if this is true. */ protected boolean fluffClusters = false; /** * Whether to keep the initial seed nodes of a cluster within the cluster even * if their removal would increase the value of the goal function. */ protected boolean keepInitialSeeds = false; /** * Complex merging method. * * Possible values: * <ul> * <li>single: single-pass merge</li> * <li>multi: multi-pass merge</li> * </ul> */ protected String mergingMethod = "single"; /** * Whether to reject seeds from the seed generator if all the nodes in the seed have * already been made part of a cluster before. */ protected boolean rejectSeedsWithOnlyUsedNodes = false; /** * The seed generation method. */ protected SeedGenerator seedGenerator = new EveryNodeSeedGenerator(); /** * Similarity function used by the complex merging methods. */ protected SimilarityFunction<NodeSet> similarityFunction = new MatchingScore<NodeSet>(); /** * The number of threads to use during the cluster growth phase. Zero or negative * numbers mean to select it automatically based on the number of CPU cores. */ protected int numThreads = 0; /** * Returns the k-core threshold used by the algorithm * * @return the k-core threshold */ public int getKCoreThreshold() { return kCoreThreshold; } /** * Returns the haircut threshold used by the algorithm * * @return the haircut threshold */ public double getHaircutThreshold() { return haircutThreshold; } /** * Returns the name of the merging method used by the algorithm * * @return the name of the merging method */ public String getMergingMethodName() { return mergingMethod; } /** * Returns the minimum density of clusters * @return the minimum density of clusters */ public Double getMinDensity() { return minDensity; } /** * Returns the minimum size of the clusters that will be returned * @return the minimum size */ public int getMinSize() { return minSize; } /** * Returns the number of threads to use during the algorithm. May return zero, * in which case the algorithm should use the number of CPU cores as a guideline. * * @return the number of threads to use */ public int getNumThreads() { return numThreads; } /** * Returns the penalty value associated with each node. * * See {@link uk.ac.rhul.cs.cl1.quality.CohesivenessFunction} for more details about what it is. * * @return the penalty value */ public double getNodePenalty() { return nodePenalty; } /** * Returns the overlap threshold of the algorithm. * * The overlap threshold controls whether two given clusters will be merged in the final * result set. The complexes will be merged if their matching ratio or meet/min * coefficient (depending on the current {@link #mergingMethod}) is larger than * this ratio. * * @return the overlap threshold */ public double getOverlapThreshold() { return overlapThreshold; } /** * Returns whether the algorithm should reject seeds from the seed generator if the seed * contains only nodes that have been seen before in other clusters or seeds. */ public boolean shouldRejectSeedsWithOnlyUsedNodes() { return rejectSeedsWithOnlyUsedNodes; } /** * Returns the quality function that will be used by the algorithm. * * @return the quality function */ public QualityFunction getQualityFunction() { return new CohesivenessFunction(nodePenalty); } /** * Returns the seed generation method of the algorithm. * @return the seed generation method */ public SeedGenerator getSeedGenerator() { return seedGenerator; } /** * Returns the similarity function used to compare clusters. * * This is indirectly specified by the value of {@link #mergingMethod}. * * @throws ClusterONEException if the merging method is unknown */ public SimilarityFunction<NodeSet> getSimilarityFunction() { return similarityFunction; } /** * Returns whether we will fluff the clusters or not. * * Yes, this is a funny name, but I wanted to keep this class compatible with * JavaBean naming conventions, which prefers an "is" prefix for boolean * getters. */ public boolean isFluffClusters() { return this.fluffClusters; } /** * Returns whether the initial seed nodes of a cluster are always kept within the * cluster. * * @return whether the initial seed nodes of a cluster are always kept within the * cluster. */ public boolean isKeepInitialSeeds() { return keepInitialSeeds; } /** * Sets the k-core threshold. * * @param kCoreThreshold the new k-core threshold */ public void setKCoreThreshold(int kCoreThreshold) { this.kCoreThreshold = kCoreThreshold; } /** * Sets whether we want to fluff the clusters or not. * * @param fluffClusters whether we want to fluff the clusters or not. */ public void setFluffClusters(boolean fluffClusters) { this.fluffClusters = fluffClusters; } /** * Sets whether the initial seed nodes are always kept within the cluster or not. * * @param keepInitialSeeds whether the seed nodes are always kept within * the cluster or not. */ public void setKeepInitialSeeds(boolean keepInitialSeeds) { this.keepInitialSeeds = keepInitialSeeds; } /** * Sets the haircut threshold of the algorithm. * * @param haircutThreshold the new haircut threshold */ public void setHaircutThreshold(double haircutThreshold) { this.haircutThreshold = haircutThreshold; } /** * Sets the name of the merging method that will be used by the algorithm. * * @param mergingMethod the merging method to use */ public void setMergingMethodName(String mergingMethod) { this.mergingMethod = mergingMethod.toLowerCase(); } /** * Sets the minimum density of clusters that can be considered acceptable. * * @param minDensity the mininum density. null means that the density will * be set to half the median edge weight of the network. */ public void setMinDensity(Double minDensity) { if (minDensity == null) this.minDensity = null; else this.minDensity = Math.max(0, minDensity); } /** * Sets the minimum size of the clusters that will be returned. * * @param minSize the minimum size */ public void setMinSize(int minSize) { this.minSize = Math.max(1, minSize); } /** * Sets the penalty value associated with each node. * * See {@link CohesivenessFunction} for more details about what it is. * * @param penalty the penalty value */ public void setNodePenalty(double penalty) { this.nodePenalty = penalty; } /** * Sets the number of threads to use during the algorithm. Zero means to use the number of * CPU cores as a guideline. Negative numbers are treated as zero. */ public void setNumThreads(int numThreads) { this.numThreads = Math.max(0, numThreads); } /** * Sets the overlap threshold of the algorithm. * * @param overlapThreshold the new overlap threshold * @see #getOverlapThreshold() */ public void setOverlapThreshold(double overlapThreshold) { this.overlapThreshold = Math.max(0, overlapThreshold); } /** * Sets whether the algorithm will reject seeds that contain only nodes that have already * been made part of a cluster grown from a different (earlier) seed. */ public void setRejectSeedsWithOnlyUsedNodes(boolean rejectSeedsWithOnlyUsedNodes) { this.rejectSeedsWithOnlyUsedNodes = rejectSeedsWithOnlyUsedNodes; } /** * Sets the seed generation method of the algorithm from a string specification * * @param seedMethodSpec the new seed generation method. Must be a specification * that is understood by {@link SeedGenerator#fromString(String)} */ public void setSeedGenerator(String seedMethodSpec) throws InstantiationException { setSeedGenerator(SeedGenerator.fromString(seedMethodSpec)); } /** * Sets the seed generation method of the algorithm * * @param seedGenerator the new seed generation method. */ public void setSeedGenerator(SeedGenerator seedGenerator) { this.seedGenerator = seedGenerator; } /** * Sets the name of the similarity function that will be used by * the algorithm in the merging step. * * Possible values: * <ul> * <li>dice: Dice similarity</li> * <li>jaccard: Jaccard similarity</li> * <li>match: match coefficient</li> * <li>meet/min or simpson: Simpson coefficient</li> * </ul> * * @param similarityFunctionName the name of the function to use. */ public void setSimilarityFunction(String similarityFunctionName) throws InstantiationException { if (similarityFunctionName.equals("match")) this.similarityFunction = new MatchingScore<NodeSet>(); else if (similarityFunctionName.equals("meet/min") || similarityFunctionName.equals("simpson")) this.similarityFunction = new SimpsonCoefficient<NodeSet>(); else if (similarityFunctionName.equals("jaccard")) this.similarityFunction = new JaccardSimilarity<NodeSet>(); else if (similarityFunctionName.equals("dice")) this.similarityFunction = new DiceSimilarity<NodeSet>(); else throw new InstantiationException("Unknown similarity function: " +similarityFunctionName); } /** * Sets the similarity function that will be used by the algorithm in * the merging step. * * @param func the similarity function */ public void setSimilarityFunction(SimilarityFunction<NodeSet> func) { this.similarityFunction = func; } /** * Returns whether a haircut operation will be needed. */ public boolean isHaircutNeeded() { return (haircutThreshold > 0.0 && haircutThreshold <= 1.0); } /** * Returns a nice string summary of the algorithm parameters. */ public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Minimum size: " + minSize + "\n"); sb.append("Minimum density: " + minDensity + "\n"); sb.append("Overlap threshold: " + overlapThreshold + "\n"); sb.append("Haircut threshold: " + haircutThreshold + "\n"); sb.append("K-core threshold: " + kCoreThreshold + "\n"); sb.append("Node penalty: " + nodePenalty + "\n"); sb.append("Merging method: " + mergingMethod + "\n"); sb.append("Seed generator: " + seedGenerator + "\n"); sb.append("Similarity function: " + similarityFunction.getName() + "\n"); sb.append("Initial seeds kept: " + keepInitialSeeds + "\n"); sb.append("Reject seeds with only used nodes: " + rejectSeedsWithOnlyUsedNodes + "\n"); return sb.toString(); } }