package ch.akuhn.org.ggobi.plugins.ggvis; import static java.lang.Math.abs; import static java.lang.Math.pow; import static java.lang.Math.sqrt; import ch.akuhn.matrix.DenseMatrix; import ch.akuhn.matrix.Function; import ch.akuhn.matrix.SymmetricMatrix; /** Multidimensional scaling. * Initially ported from ggvis and greatly rewritten in Java by Adrian Kuhn. * Original copyright notice: *<PRE>/<span/>* * * mds.c: multidimensional scaling * * code originally written for xgvis by Michael Littman, greatly extended * * and tuned by Andreas Buja. Now being ported to ggvis. * *<span/>/</PRE> * @author Adrian Kuhn (this in Java) * @author Andreas Buja (ggvis in C/C++) * @author Michael Littman (xgvis in C/C++) * */ public class Mds { static final int ANCHOR = 2; private static boolean ANCHOR_FIXED = false; private static boolean ANCHOR_SCALE = false; static final double delta = 1E-10; static final int DRAGGED = 4; private static final boolean TODO_SYMMETRY = false; final DenseMatrix config_dist; final DenseMatrix Dtarget; /*-- D in the documentation; dist in the xgvis code --*/ private Points pos; /* these belong in ggv */ public double stress; private double stress_dx, stress_dd, stress_xx; private final double sig_pow(double x, double p) { return (x >= 0.0 ? pow(x, p) : -pow(-x, p)); } private double stepsize = 0.02; private double dist_power = 1.0; private double Dtarget_power = 1.0; private double lnorm = 2.0; private double weight_power = 0.0; private double dist_power_over_lnorm = 0.5; private double lnorm_over_dist_power = 2.0; private double within_between = 1.0; private double rand_select_val = 1.0; /* selection probability */ private double threshold_high = 0.0; private double threshold_low = 0.0; private MDSGroupInd group_ind = MDSGroupInd.all_distances; /*-- used in mds.c --*/ private DenseMatrix weights = null; final private Points gradient; private double Dtarget_max = Double.MAX_VALUE; private double Dtarget_min = Double.MIN_VALUE; private final int len; /* */ public void init(boolean randomPoints) { /* populate with INF */ this.Dtarget.apply(f_dtarget); this.Dtarget_max = Dtarget.max(); this.Dtarget_min = Dtarget.min(); if (Dtarget_min < 0) throw new Error("negative dissimilarity: D[?][?] = ? -> NA\n"); this.threshold_low = this.Dtarget_min; this.threshold_high = this.Dtarget_max; if (randomPoints) { for (int a = 0; a < pos.x.length; a++) { this.pos.x[a] = (Math.random() - 0.5) * 2; this.pos.y[a] = (Math.random() - 0.5) * 2; } } this.config_dist.fill(Double.NaN); set_weights(); } public Mds(DenseMatrix dissimilarities, Points initial, Function fConfigDist, Function fWeights, Function fDtarget) { len = dissimilarities.rowCount(); Dtarget = dissimilarities; config_dist = new SymmetricMatrix(len); this.pos = initial == null ? new Points(len) : initial; this.gradient = new Points(len); f_config_dist = fConfigDist; f_weights = fWeights; f_dtarget = fDtarget; this.init(initial == null); } private final Function f_config_dist; private final Function f_weights; private final Function f_dtarget; private boolean IS_ANCHOR(int i) { return i % 100 == 0; } boolean IS_DRAGGED(int i) { return false; } private double Lp_distance_pow (int i, int j) { double dsum = 0.0; if (this.lnorm == 2. && this.dist_power == 1.) { dsum += (this.pos.x[i] - this.pos.x[j]) * (this.pos.x[i] - this.pos.x[j]); dsum += (this.pos.y[i] - this.pos.y[j]) * (this.pos.y[i] - this.pos.y[j]); return (sqrt(dsum)); } else { /* non-Euclidean or Dtarget power != 1. */ dsum += pow (abs (this.pos.x[i] - this.pos.x[j]), this.lnorm); dsum += pow (abs (this.pos.y[i] - this.pos.y[j]), this.lnorm); return (pow(dsum, this.dist_power_over_lnorm)); } } /** * Perform one loop of the iterative mds function. *<P> * If doit is False, then we really want to determine the * stress function without doing anything to the gradient */ public void mds_once (boolean doit) { mds_once_part2(); mds_once_part3(doit); /* close: if (doit && num_active_dist > 0) */ /* experiment: normalize point cloud after using simplified gradient */ // FIXME can we avoid this? pos.ggv_center_scale_pos(); } private void mds_once_part2() { // allocate position and compute means pos.get_center(); // i's are moved by j's for (int i = 0; i < this.Dtarget.rowCount(); i++) { // these points are not moved by the gradient if (IS_DRAGGED(i) || (ANCHOR_FIXED && IS_ANCHOR(i))) continue; /* j's are moving i's */ for (int j = 0; j < i; j++) { if (mds_once_part2_continue(i, j)) continue; this.config_dist.put(i,j,f_config_dist.apply(Lp_distance_pow(i, j))); } } } private boolean mds_once_part2_continue(int i, int j) { /* these points do not contribute to the gradient */ if ((ANCHOR_SCALE || ANCHOR_FIXED) && !IS_ANCHOR(j) && !IS_DRAGGED(j)) return true; if ((ANCHOR_SCALE || ANCHOR_FIXED) && !IS_ANCHOR(i) && !IS_DRAGGED(i)) return true; /* if the target distance is missing, skip */ if (Double.isNaN(this.Dtarget.get(i,j))) return true; /* if weight is zero, skip */ if (this.weights != null && this.weights.get(i,j) == 0.) return true; /* using groups */ if (this.group_ind == MDSGroupInd.within && !SAMEGLYPH(i,j)) return true; if (this.group_ind == MDSGroupInd.between && SAMEGLYPH(i,j)) return true; /* * if the target distance is within the thresholds * set using the barplot of distances, keep going. */ if (this.Dtarget.get(i,j) < this.threshold_low || this.Dtarget.get(i,j) > this.threshold_high) return true; /* * random selection: needs to be done symmetrically */ if (this.rand_select_val < 1.0) { //if (i < j && this.rand_sel.vals[i][j] > this.rand_select_val) continue; //if (i > j && this.rand_sel.vals[j][i] > this.rand_select_val) continue; } /* * zero weights: * assume weights exist if test is positive, and * can now assume that weights are >0 for non-NA */ if (!doesNotWeight()) { if (this.weights.get(i,j) == 0.) return true; } return false; } private void mds_once_part3(boolean doit) { power_transform(); update_stress(); /* --- for active dissimilarities, do the gradient push if asked for ----*/ if (doit) { /* Zero out the gradient matrix. */ this.gradient.clear(); /* ------------- gradient accumulation: j's push i's ----------- */ for (int i = 0; i < this.Dtarget.rowCount(); i++) { for (int j = 0; j < i; j++) { double weight; double dist_trans = this.Dtarget.get(i,j); if (Double.isNaN(dist_trans)) continue; double dist_config = this.config_dist.get(i,j); if (abs(dist_config) < delta) dist_config = delta; if (doesNotWeight()) { weight = 1.0; } else { weight = this.weights.get(i,j); } mds_once_part3_gradient(dist_trans, dist_config, weight, i, j); } } /* center the classical gradient */ double gfactor = mds_once_part3_normalizeGradient(); /* add the gradient matrix to the position matrix and drag points */ for (int i=0; i<this.pos.x.length; i++) { if (!IS_DRAGGED(i)) { this.pos.x[i] += (gfactor * this.gradient.x[i]); this.pos.y[i] += (gfactor * this.gradient.y[i]); } else { throw null; // for (int k=0; k < this.dim; k++) // this.pos.vals[i][k] = dpos.tform.vals[i][k] ; } } } } private void mds_once_part3_gradient(double dist_trans, double dist_config, double weight, int i, int j) { double resid; double step_mag; // scale independent version: */ resid = (dist_trans - stress_dx / stress_xx * dist_config); // scale dependent version: // resid = (dist_trans - dist_config); if (this.lnorm != 2) { assert TODO_SYMMETRY; /* non-Euclidean Minkowski/Lebesgue metric */ step_mag = weight * resid * pow (dist_config, 1 - this.lnorm_over_dist_power); for (int k = 0; k < 2; k++) { this.gradient.x[i] += step_mag * sig_pow(this.pos.x[i]-this.pos.x[j], this.lnorm-1.0); this.gradient.y[i] += step_mag * sig_pow(this.pos.y[i]-this.pos.y[j], this.lnorm-1.0); this.gradient.x[j] += step_mag * sig_pow(this.pos.x[j]-this.pos.x[i], this.lnorm-1.0); this.gradient.y[j] += step_mag * sig_pow(this.pos.y[j]-this.pos.y[i], this.lnorm-1.0); } } else { /* Euclidean Minkowski/Lebesgue metric */ /* Note the simplification of the code for the special * cases when dist_power takes on an integer value. */ if (this.dist_power == 1) step_mag = weight * resid / dist_config; else if(this.dist_power == 2) step_mag = weight * resid; else if (this.dist_power == 3) step_mag = weight * resid * dist_config; else if (this.dist_power == 4) step_mag = weight * resid * dist_config * dist_config; else step_mag = weight * resid * pow(dist_config, this.dist_power-2.); this.gradient.x[i] += step_mag * (this.pos.x[i]-this.pos.x[j]); /* Euclidean! */ this.gradient.y[i] += step_mag * (this.pos.y[i]-this.pos.y[j]); /* Euclidean! */ this.gradient.x[j] += step_mag * (this.pos.x[j]-this.pos.x[i]); /* Euclidean! */ this.gradient.y[j] += step_mag * (this.pos.y[j]-this.pos.y[i]); /* Euclidean! */ } } /** gradient normalizing factor to scale gradient to a fraction of the size of the configuration */ private double mds_once_part3_normalizeGradient() { double gsum = 0, psum = 0; for (int i=0; i<this.pos.x.length; i++) { // if (true || (ANCHOR_SCALE && IS_ANCHOR(i))) gsum += pos.L2_norm (this.gradient.x[i], this.gradient.y[i]); psum += pos.L2_norm (this.pos.x[i], this.pos.y[i]); } return (gsum < delta) ? 0.0 : this.stepsize * sqrt(psum/gsum); } public double[][] points() { return pos.points(); } /* we assume in this routine that trans_dist contains dist.data for KruskalShepard and -dist.data*dist.data for CLASSIC MDS */ private void power_transform () { if (this.Dtarget_power == 1.) { return; } else if (this.Dtarget_power == 2.) { throw null; // if (this.KruskalShepard_classic == MDSKSInd.KruskalShepard) { // for (i=0; i<this.ndistances; i++) { // tmp = this.trans_dist.els[i]; // if (tmp != Double.MAX_VALUE) // this.trans_dist.els[i] = tmp*tmp/this.Dtarget_max; // } // } else { // for (i=0; i<this.ndistances; i++) { // tmp = this.trans_dist.els[i]; // if (tmp != Double.MAX_VALUE) // this.trans_dist.els[i] = -tmp*tmp/this.Dtarget_max; // } // } } else { throw null; // fac = pow (this.Dtarget_max, this.Dtarget_power-1); // if (this.KruskalShepard_classic == MDSKSInd.KruskalShepard) { // for(i=0; i<this.ndistances; i++) { // tmp = this.trans_dist.els[i]; // if (tmp != Double.MAX_VALUE) // this.trans_dist.els[i] = pow(tmp, this.Dtarget_power)/fac; // } // } else { // for(i=0; i<this.ndistances; i++) { // tmp = this.trans_dist.els[i]; // if(tmp != Double.MAX_VALUE) // this.trans_dist.els[i] = -pow(-tmp, this.Dtarget_power)/fac; // } // } } } /* end power_transform() */ private boolean SAMEGLYPH(int i, int j) { return (i / 100) == (j / 100); } /* * weights are only set if weightpow != 0; for 0 there's simpler *code throughout, and we save space */ private void set_weights () { //double this_weight; //double local_weight_power = 0.; //double local_within_between = 1.; this.weights = new SymmetricMatrix(Dtarget.rowCount()); for (int i = 0; i < Dtarget.rowCount(); i++) { for (int j = 0; j < i; j++) { this.weights.put(i,j, f_weights.apply(this.Dtarget.get(i,j))); } } this.weights = null; /* the weights will be used in metric and nonmetric scaling * as soon as weightpow != 0. or within_between != 1. * weights vector only if needed */ // if ((this.weight_power != local_weight_power && // this.weight_power != 0.) || // (this.within_between != local_within_between && // this.within_between != 1.)) // { // assert false; // TODO // // for (i=0; i<this.Dtarget.vals.length; i++) { // for (j=0; j<this.Dtarget.vals.length; j++) { // if (Double.isNaN(this.Dtarget.vals[i][j])) { // this.weights.vals[i][j] = Double.NaN; // continue; // } // if (this.weight_power != 0.) { // if(this.Dtarget.vals[i][j] == 0.) { /* cap them */ // if (this.weight_power < 0.) { // this.weights.vals[i][j] = 1E5; // continue; // } // else { // this.weights.vals[i][j] = 1E-5; // } // } // this_weight = pow(this.Dtarget.vals[i][j], this.weight_power); // /* cap them */ // if (this_weight > 1E5) this_weight = 1E5; // else if (this_weight < 1E-5) this_weight = 1E-5; // /* within-between weighting */ // if (SAMEGLYPH(i,j)) // this_weight *= (2. - this.within_between); // else // this_weight *= this.within_between; // this.weights.vals[i][j] = this_weight; // } else { /* weightpow == 0. */ // if (SAMEGLYPH(i,j)) // this_weight = (2. - this.within_between); // else // this_weight = this.within_between; // this.weights.vals[i][j] = this_weight; // } // } // } // } } /* end set_weights() */ private void update_stress () { stress_dx = stress_xx = stress_dd = 0; for (int i=0; i < this.Dtarget.rowCount(); i++) for (int j=0; j < i; j++) { double dist_trans = this.Dtarget.get(i,j) * 2; // symmetry! if (Double.isNaN(dist_trans)) continue; double dist_config = this.config_dist.get(i,j) * 2; // symmetry! if (doesNotWeight()) { stress_dx += dist_trans * dist_config; stress_xx += dist_config * dist_config; stress_dd += dist_trans * dist_trans; } else { double this_weight = this.weights.get(i,j) * 2; // symmetry! stress_dx += dist_trans * dist_config * this_weight; stress_xx += dist_config * dist_config * this_weight; stress_dd += dist_trans * dist_trans * this_weight; } } /* calculate stress and draw it */ if (stress_dd * stress_xx > delta*delta) { stress = pow( 1.0 - stress_dx * stress_dx / stress_xx / stress_dd, 0.5); // FIXME add_stress_value (stress, ggv); // draw_stress (ggv, gg); } else { // FIXME do we need to throw an error? this results in an error for very small projects // throw new Error("didn't draw stress: stress_dx = " + stress_dx + " stress_dd = " + stress_dd + " stress_xx = " + stress_xx); } } /* end update_stress() */ private boolean doesNotWeight() { return this.weight_power == 0. && this.within_between == 1.; } enum MDSAnchorInd {fixed, no_anchor, scaled}; enum MDSGroupInd {all_distances, between, within}; }