package ch.akuhn.org.ggobi.plugins.ggvis;
import static java.lang.Math.abs;
import static java.lang.Math.pow;
import static java.lang.Math.sqrt;
import ch.akuhn.matrix.DenseMatrix;
import ch.akuhn.matrix.Function;
import ch.akuhn.matrix.SymmetricMatrix;
/** Multidimensional scaling.
* Initially ported from ggvis and greatly rewritten in Java by Adrian Kuhn.
* Original copyright notice:
*<PRE>/<span/>*
* * mds.c: multidimensional scaling
* * code originally written for xgvis by Michael Littman, greatly extended
* * and tuned by Andreas Buja. Now being ported to ggvis.
* *<span/>/</PRE>
* @author Adrian Kuhn (this in Java)
* @author Andreas Buja (ggvis in C/C++)
* @author Michael Littman (xgvis in C/C++)
*
*/
public class Mds {
static final int ANCHOR = 2;
private static boolean ANCHOR_FIXED = false;
private static boolean ANCHOR_SCALE = false;
static final double delta = 1E-10;
static final int DRAGGED = 4;
private static final boolean TODO_SYMMETRY = false;
final DenseMatrix config_dist;
final DenseMatrix Dtarget; /*-- D in the documentation; dist in the xgvis code --*/
private Points pos;
/* these belong in ggv */
public double stress;
private double stress_dx, stress_dd, stress_xx;
private final double sig_pow(double x, double p) {
return (x >= 0.0 ? pow(x, p) : -pow(-x, p));
}
private double stepsize = 0.02;
private double dist_power = 1.0;
private double Dtarget_power = 1.0;
private double lnorm = 2.0;
private double weight_power = 0.0;
private double dist_power_over_lnorm = 0.5;
private double lnorm_over_dist_power = 2.0;
private double within_between = 1.0;
private double rand_select_val = 1.0; /* selection probability */
private double threshold_high = 0.0;
private double threshold_low = 0.0;
private MDSGroupInd group_ind = MDSGroupInd.all_distances;
/*-- used in mds.c --*/
private DenseMatrix weights = null;
final private Points gradient;
private double Dtarget_max = Double.MAX_VALUE;
private double Dtarget_min = Double.MIN_VALUE;
private final int len;
/* */
public void init(boolean randomPoints) {
/* populate with INF */
this.Dtarget.apply(f_dtarget);
this.Dtarget_max = Dtarget.max();
this.Dtarget_min = Dtarget.min();
if (Dtarget_min < 0) throw new Error("negative dissimilarity: D[?][?] = ? -> NA\n");
this.threshold_low = this.Dtarget_min;
this.threshold_high = this.Dtarget_max;
if (randomPoints) {
for (int a = 0; a < pos.x.length; a++) {
this.pos.x[a] = (Math.random() - 0.5) * 2;
this.pos.y[a] = (Math.random() - 0.5) * 2;
}
}
this.config_dist.fill(Double.NaN);
set_weights();
}
public Mds(DenseMatrix dissimilarities, Points initial,
Function fConfigDist, Function fWeights, Function fDtarget) {
len = dissimilarities.rowCount();
Dtarget = dissimilarities;
config_dist = new SymmetricMatrix(len);
this.pos = initial == null ? new Points(len) : initial;
this.gradient = new Points(len);
f_config_dist = fConfigDist;
f_weights = fWeights;
f_dtarget = fDtarget;
this.init(initial == null);
}
private final Function f_config_dist;
private final Function f_weights;
private final Function f_dtarget;
private boolean IS_ANCHOR(int i) {
return i % 100 == 0;
}
boolean IS_DRAGGED(int i) {
return false;
}
private double Lp_distance_pow (int i, int j) {
double dsum = 0.0;
if (this.lnorm == 2. && this.dist_power == 1.) {
dsum += (this.pos.x[i] - this.pos.x[j]) * (this.pos.x[i] - this.pos.x[j]);
dsum += (this.pos.y[i] - this.pos.y[j]) * (this.pos.y[i] - this.pos.y[j]);
return (sqrt(dsum));
} else { /* non-Euclidean or Dtarget power != 1. */
dsum += pow (abs (this.pos.x[i] - this.pos.x[j]), this.lnorm);
dsum += pow (abs (this.pos.y[i] - this.pos.y[j]), this.lnorm);
return (pow(dsum, this.dist_power_over_lnorm));
}
}
/**
* Perform one loop of the iterative mds function.
*<P>
* If doit is False, then we really want to determine the
* stress function without doing anything to the gradient
*/
public void mds_once (boolean doit) {
mds_once_part2();
mds_once_part3(doit); /* close: if (doit && num_active_dist > 0) */
/* experiment: normalize point cloud after using simplified gradient */
// FIXME can we avoid this?
pos.ggv_center_scale_pos();
}
private void mds_once_part2() {
// allocate position and compute means
pos.get_center();
// i's are moved by j's
for (int i = 0; i < this.Dtarget.rowCount(); i++) {
// these points are not moved by the gradient
if (IS_DRAGGED(i) || (ANCHOR_FIXED && IS_ANCHOR(i))) continue;
/* j's are moving i's */
for (int j = 0; j < i; j++) {
if (mds_once_part2_continue(i, j)) continue;
this.config_dist.put(i,j,f_config_dist.apply(Lp_distance_pow(i, j)));
}
}
}
private boolean mds_once_part2_continue(int i, int j) {
/* these points do not contribute to the gradient */
if ((ANCHOR_SCALE || ANCHOR_FIXED) && !IS_ANCHOR(j) && !IS_DRAGGED(j)) return true;
if ((ANCHOR_SCALE || ANCHOR_FIXED) && !IS_ANCHOR(i) && !IS_DRAGGED(i)) return true;
/* if the target distance is missing, skip */
if (Double.isNaN(this.Dtarget.get(i,j))) return true;
/* if weight is zero, skip */
if (this.weights != null && this.weights.get(i,j) == 0.) return true;
/* using groups */
if (this.group_ind == MDSGroupInd.within && !SAMEGLYPH(i,j)) return true;
if (this.group_ind == MDSGroupInd.between && SAMEGLYPH(i,j)) return true;
/*
* if the target distance is within the thresholds
* set using the barplot of distances, keep going.
*/
if (this.Dtarget.get(i,j) < this.threshold_low ||
this.Dtarget.get(i,j) > this.threshold_high) return true;
/*
* random selection: needs to be done symmetrically
*/
if (this.rand_select_val < 1.0) {
//if (i < j && this.rand_sel.vals[i][j] > this.rand_select_val) continue;
//if (i > j && this.rand_sel.vals[j][i] > this.rand_select_val) continue;
}
/*
* zero weights:
* assume weights exist if test is positive, and
* can now assume that weights are >0 for non-NA
*/
if (!doesNotWeight()) {
if (this.weights.get(i,j) == 0.) return true;
}
return false;
}
private void mds_once_part3(boolean doit) {
power_transform();
update_stress();
/* --- for active dissimilarities, do the gradient push if asked for ----*/
if (doit) {
/* Zero out the gradient matrix. */
this.gradient.clear();
/* ------------- gradient accumulation: j's push i's ----------- */
for (int i = 0; i < this.Dtarget.rowCount(); i++) {
for (int j = 0; j < i; j++) {
double weight;
double dist_trans = this.Dtarget.get(i,j);
if (Double.isNaN(dist_trans)) continue;
double dist_config = this.config_dist.get(i,j);
if (abs(dist_config) < delta) dist_config = delta;
if (doesNotWeight()) {
weight = 1.0;
} else {
weight = this.weights.get(i,j);
}
mds_once_part3_gradient(dist_trans, dist_config, weight, i, j);
}
}
/* center the classical gradient */
double gfactor = mds_once_part3_normalizeGradient();
/* add the gradient matrix to the position matrix and drag points */
for (int i=0; i<this.pos.x.length; i++) {
if (!IS_DRAGGED(i)) {
this.pos.x[i] += (gfactor * this.gradient.x[i]);
this.pos.y[i] += (gfactor * this.gradient.y[i]);
} else {
throw null;
// for (int k=0; k < this.dim; k++)
// this.pos.vals[i][k] = dpos.tform.vals[i][k] ;
}
}
}
}
private void mds_once_part3_gradient(double dist_trans,
double dist_config, double weight, int i, int j) {
double resid;
double step_mag;
// scale independent version: */
resid = (dist_trans - stress_dx / stress_xx * dist_config);
// scale dependent version:
// resid = (dist_trans - dist_config);
if (this.lnorm != 2) {
assert TODO_SYMMETRY;
/* non-Euclidean Minkowski/Lebesgue metric */
step_mag = weight * resid *
pow (dist_config, 1 - this.lnorm_over_dist_power);
for (int k = 0; k < 2; k++) {
this.gradient.x[i] += step_mag * sig_pow(this.pos.x[i]-this.pos.x[j], this.lnorm-1.0);
this.gradient.y[i] += step_mag * sig_pow(this.pos.y[i]-this.pos.y[j], this.lnorm-1.0);
this.gradient.x[j] += step_mag * sig_pow(this.pos.x[j]-this.pos.x[i], this.lnorm-1.0);
this.gradient.y[j] += step_mag * sig_pow(this.pos.y[j]-this.pos.y[i], this.lnorm-1.0);
}
} else { /* Euclidean Minkowski/Lebesgue metric */
/* Note the simplification of the code for the special
* cases when dist_power takes on an integer value. */
if (this.dist_power == 1)
step_mag = weight * resid / dist_config;
else if(this.dist_power == 2)
step_mag = weight * resid;
else if (this.dist_power == 3)
step_mag = weight * resid * dist_config;
else if (this.dist_power == 4)
step_mag = weight * resid * dist_config * dist_config;
else
step_mag = weight * resid *
pow(dist_config, this.dist_power-2.);
this.gradient.x[i] += step_mag * (this.pos.x[i]-this.pos.x[j]); /* Euclidean! */
this.gradient.y[i] += step_mag * (this.pos.y[i]-this.pos.y[j]); /* Euclidean! */
this.gradient.x[j] += step_mag * (this.pos.x[j]-this.pos.x[i]); /* Euclidean! */
this.gradient.y[j] += step_mag * (this.pos.y[j]-this.pos.y[i]); /* Euclidean! */
}
}
/** gradient normalizing factor to scale gradient to a fraction of
the size of the configuration */
private double mds_once_part3_normalizeGradient() {
double gsum = 0, psum = 0;
for (int i=0; i<this.pos.x.length; i++) {
// if (true || (ANCHOR_SCALE && IS_ANCHOR(i)))
gsum += pos.L2_norm (this.gradient.x[i], this.gradient.y[i]);
psum += pos.L2_norm (this.pos.x[i], this.pos.y[i]);
}
return (gsum < delta) ? 0.0 : this.stepsize * sqrt(psum/gsum);
}
public double[][] points() {
return pos.points();
}
/* we assume in this routine that trans_dist contains
dist.data for KruskalShepard and
-dist.data*dist.data for CLASSIC MDS */
private void power_transform () {
if (this.Dtarget_power == 1.) {
return;
} else if (this.Dtarget_power == 2.) {
throw null;
// if (this.KruskalShepard_classic == MDSKSInd.KruskalShepard) {
// for (i=0; i<this.ndistances; i++) {
// tmp = this.trans_dist.els[i];
// if (tmp != Double.MAX_VALUE)
// this.trans_dist.els[i] = tmp*tmp/this.Dtarget_max;
// }
// } else {
// for (i=0; i<this.ndistances; i++) {
// tmp = this.trans_dist.els[i];
// if (tmp != Double.MAX_VALUE)
// this.trans_dist.els[i] = -tmp*tmp/this.Dtarget_max;
// }
// }
} else {
throw null;
// fac = pow (this.Dtarget_max, this.Dtarget_power-1);
// if (this.KruskalShepard_classic == MDSKSInd.KruskalShepard) {
// for(i=0; i<this.ndistances; i++) {
// tmp = this.trans_dist.els[i];
// if (tmp != Double.MAX_VALUE)
// this.trans_dist.els[i] = pow(tmp, this.Dtarget_power)/fac;
// }
// } else {
// for(i=0; i<this.ndistances; i++) {
// tmp = this.trans_dist.els[i];
// if(tmp != Double.MAX_VALUE)
// this.trans_dist.els[i] = -pow(-tmp, this.Dtarget_power)/fac;
// }
// }
}
} /* end power_transform() */
private boolean SAMEGLYPH(int i, int j) {
return (i / 100) == (j / 100);
}
/*
* weights are only set if weightpow != 0; for 0 there's simpler
*code throughout, and we save space
*/
private void set_weights () {
//double this_weight;
//double local_weight_power = 0.;
//double local_within_between = 1.;
this.weights = new SymmetricMatrix(Dtarget.rowCount());
for (int i = 0; i < Dtarget.rowCount(); i++) {
for (int j = 0; j < i; j++) {
this.weights.put(i,j, f_weights.apply(this.Dtarget.get(i,j)));
}
}
this.weights = null;
/* the weights will be used in metric and nonmetric scaling
* as soon as weightpow != 0. or within_between != 1.
* weights vector only if needed */
// if ((this.weight_power != local_weight_power &&
// this.weight_power != 0.) ||
// (this.within_between != local_within_between &&
// this.within_between != 1.))
// {
// assert false; // TODO
//
// for (i=0; i<this.Dtarget.vals.length; i++) {
// for (j=0; j<this.Dtarget.vals.length; j++) {
// if (Double.isNaN(this.Dtarget.vals[i][j])) {
// this.weights.vals[i][j] = Double.NaN;
// continue;
// }
// if (this.weight_power != 0.) {
// if(this.Dtarget.vals[i][j] == 0.) { /* cap them */
// if (this.weight_power < 0.) {
// this.weights.vals[i][j] = 1E5;
// continue;
// }
// else {
// this.weights.vals[i][j] = 1E-5;
// }
// }
// this_weight = pow(this.Dtarget.vals[i][j], this.weight_power);
// /* cap them */
// if (this_weight > 1E5) this_weight = 1E5;
// else if (this_weight < 1E-5) this_weight = 1E-5;
// /* within-between weighting */
// if (SAMEGLYPH(i,j))
// this_weight *= (2. - this.within_between);
// else
// this_weight *= this.within_between;
// this.weights.vals[i][j] = this_weight;
// } else { /* weightpow == 0. */
// if (SAMEGLYPH(i,j))
// this_weight = (2. - this.within_between);
// else
// this_weight = this.within_between;
// this.weights.vals[i][j] = this_weight;
// }
// }
// }
// }
} /* end set_weights() */
private void update_stress () {
stress_dx = stress_xx = stress_dd = 0;
for (int i=0; i < this.Dtarget.rowCount(); i++)
for (int j=0; j < i; j++) {
double dist_trans = this.Dtarget.get(i,j) * 2; // symmetry!
if (Double.isNaN(dist_trans)) continue;
double dist_config = this.config_dist.get(i,j) * 2; // symmetry!
if (doesNotWeight()) {
stress_dx += dist_trans * dist_config;
stress_xx += dist_config * dist_config;
stress_dd += dist_trans * dist_trans;
} else {
double this_weight = this.weights.get(i,j) * 2; // symmetry!
stress_dx += dist_trans * dist_config * this_weight;
stress_xx += dist_config * dist_config * this_weight;
stress_dd += dist_trans * dist_trans * this_weight;
}
}
/* calculate stress and draw it */
if (stress_dd * stress_xx > delta*delta) {
stress = pow( 1.0 - stress_dx * stress_dx / stress_xx / stress_dd, 0.5);
// FIXME add_stress_value (stress, ggv);
// draw_stress (ggv, gg);
} else {
// FIXME do we need to throw an error? this results in an error for very small projects
// throw new Error("didn't draw stress: stress_dx = " + stress_dx + " stress_dd = " + stress_dd + " stress_xx = " + stress_xx);
}
} /* end update_stress() */
private boolean doesNotWeight() {
return this.weight_power == 0. && this.within_between == 1.;
}
enum MDSAnchorInd {fixed, no_anchor, scaled};
enum MDSGroupInd {all_distances, between, within};
}