package com.spbsu.exp; import com.spbsu.commons.math.MathTools; import com.spbsu.commons.math.vectors.Mx; import com.spbsu.commons.math.vectors.Vec; import com.spbsu.commons.math.vectors.impl.mx.VecBasedMx; import com.spbsu.commons.math.vectors.impl.vectors.ArrayVec; import com.spbsu.commons.random.FastRandom; import com.spbsu.ml.methods.MTA; import junit.framework.TestCase; import java.util.Random; import static com.spbsu.commons.math.MathTools.sqr; import static com.spbsu.commons.math.vectors.VecTools.*; /** * Stein paradox experiments. * User: solar * Date: 16.04.14 * Time: 13:24 */ public class DispersionTest extends TestCase { public void testStein() { Random rng = new FastRandom(); for (int n = 3; n < 1000; n++) { double sumAvgNaive = 0; double sumAvgJS = 0; double sumAvgJS2 = 0; double sumAvgMta = 0; double sumAvgMtaMiniMax = 0; double sumAvgMtaOrcale = 0; for (int k = 0; k < 1000; k++) { final int mdim = 15; Vec sum = new ArrayVec(mdim); Vec m = new ArrayVec(mdim); // Vec m = new ArrayVec(new double[]{2 * rng.nextDouble(), 2 * rng.nextDouble(), 2 * rng.nextDouble()}); for (int i = 0; i < mdim; ++i) { m.set(i, rng.nextDouble() * 2); } Mx A = new VecBasedMx(mdim, mdim); for (int i = 0; i < mdim; ++i) { for (int j = i + 1; j < mdim; ++j) { double val = m.get(i) - m.get(j); val = 2 / (val * val); A.set(i, j, val); A.set(j, i, val); } } double sigma = 1; double[][] samples = new double[mdim][n]; for (int i = 0; i < n; i++) { for (int t = 0; t < sum.dim(); t++) { samples[t][i] = sigma * rng.nextGaussian() + m.get(t); sum.adjust(t, samples[t][i]); } } Vec naive = copy(sum); scale(naive, 1. / n); Vec js = copy(naive); scale(js, (1 - (js.dim() - 2) * sigma * sigma / sqr(norm(sum)))); MTA estimator = new MTA(samples); Vec mtaOrcale = estimator.oracle(A); Vec mta = new ArrayVec(estimator.mtaConst()); sumAvgNaive += distance(naive, m) / Math.sqrt(m.dim()); sumAvgJS += distance(js, m) / Math.sqrt(m.dim()); sumAvgJS2 += distance(new ArrayVec(estimator.stein()), m) / Math.sqrt(m.dim()); sumAvgMta += distance(mta, m) / Math.sqrt(m.dim()); sumAvgMtaMiniMax += distance(new ArrayVec(new MTA(samples).mtaMiniMax()), m) / Math.sqrt(m.dim()); sumAvgMtaOrcale += distance(mtaOrcale, m) / Math.sqrt(m.dim()); } System.out.println(n + "\t" + sumAvgNaive / 1000 + "\t" + sumAvgJS / 1000 + "\t" + sumAvgJS2 / 1000 + "\t" + sumAvgMta / 1000 + "\t" + sumAvgMtaMiniMax / 1000 + "\t" + sumAvgMtaOrcale / 1000); } } public void testMtaBernoulli() { Random rng = new FastRandom(); for (int n = 5; n < 1000; n++) { double sumAvgNaive = 0; double sumAvgJS = 0; double sumAvgJS2 = 0; double sumAvgMta = 0; double sumAvgMtaMiniMax = 0; double sumAvgMtaOracle = 0; int N = 1000; for (int k = 0; k < N; k++) { final int mdim = 10; Vec sum = new ArrayVec(mdim); Vec m = new ArrayVec(mdim); for (int i = 0; i < mdim; ++i) { m.set(i, rng.nextDouble()); } Mx A = new VecBasedMx(mdim, mdim); for (int i = 0; i < mdim; ++i) { for (int j = i + 1; j < mdim; ++j) { double val = m.get(i) - m.get(j); val = 2 / (val * val); A.set(i, j, val); A.set(j, i, val); } } double sigma = 1.0; double[][] samples = new double[mdim][n]; for (int i = 0; i < n; i++) { for (int t = 0; t < sum.dim(); t++) { samples[t][i] = rng.nextDouble() > m.get(t) ? 0 : 1; sum.adjust(t, samples[t][i]); } } Vec naive = copy(sum); scale(naive, 1. / n); Vec js = copy(naive); scale(js, (1 - (js.dim() - 2) * sigma / sqr(norm(sum)))); MTA estimator = new MTA(samples); Vec mta = estimator.oracle(estimator.bernoulliSimilarity()); Vec mtaOracle = estimator.oracle(A); sumAvgNaive += distance(naive, m) / Math.sqrt(m.dim()); sumAvgJS += distance(js, m) / Math.sqrt(m.dim()); sumAvgJS2 += distance(new ArrayVec(estimator.steinBernoulli()), m) / Math.sqrt(m.dim()); // sumAvgJS2 += distance(MTA.bernoulliStein(sum, n), m) / Math.sqrt(m.dim()); sumAvgMta += distance(mta, m) / Math.sqrt(m.dim()); sumAvgMtaMiniMax += distance(new ArrayVec(new MTA(samples).mtaMiniMaxBernoulli()), m) / Math.sqrt(m.dim()); sumAvgMtaOracle += distance(mtaOracle, m) / Math.sqrt(m.dim()); } System.out.println(n + "\t" + sumAvgNaive / N + "\t" + sumAvgJS / N + "\t" + sumAvgJS2 / N + "\t" + sumAvgMta / N + "\t" + sumAvgMtaMiniMax / N + "\t" + sumAvgMtaOracle / N); } } public void testSteinBernoulli() { Random rng = new FastRandom(); for (int n = 1; n < 100; n++) { double sumAvgNaive = 0; double sumAvgJS = 0; for (int k = 0; k < 1000; k++) { Vec sum = new ArrayVec(3); Vec m = new ArrayVec(rng.nextDouble(), rng.nextDouble(), rng.nextDouble()); double sigma = 1; for (int i = 0; i < n; i++) { for (int t = 0; t < sum.dim(); t++) { sum.adjust(t, rng.nextDouble() > m.get(t) ? 0 : 1); } } Vec naive = copy(sum); scale(naive, 1. / n); Vec js = copy(sum); scale(js, (1 - (js.dim() - 2) * sigma * sigma / sqr(norm(sum))) / n); sumAvgNaive += distance(naive, m) / Math.sqrt(m.dim()); sumAvgJS += distance(js, m) / Math.sqrt(m.dim()); } System.out.println(n + "\t" + sumAvgNaive / 1000 + "\t" + sumAvgJS / 1000); } } public void test1FoldClear() { Random rng = new FastRandom(); double sumAvgNaive = 0; double sumAvgJS = 0; double sumDGP = 0; for (int k = 0; k < 1000; k++) { Vec set = new ArrayVec(3); final double m_0 = rng.nextDouble(); Vec m = new ArrayVec(m_0, m_0, m_0); double sigma = 1; for (int t = 0; t < set.dim(); t++) { set.set(t, sigma * rng.nextGaussian() + m.get(t)); } double naive; { Vec estimate = new ArrayVec(set.dim()); naive = MathTools.meanNaive(set); fill(estimate, naive); sumAvgNaive += distance(estimate, m) / Math.sqrt(m.dim()); } double js; { Vec estimate = new ArrayVec(set.dim()); js = MathTools.meanJS1(set, sigma); fill(estimate, js); sumAvgJS += distance(estimate, m) / Math.sqrt(m.dim()); } { Vec estimate = new ArrayVec(set.dim()); double dgp = MathTools.meanDropFluctuations(set); fill(estimate, dgp); sumDGP += distance(estimate, m) / Math.sqrt(m.dim()); } } System.out.println("\t" + sumAvgNaive / 1000 + "\t" + sumAvgJS / 1000 + "\t" + sumDGP / 1000); } public void test1FoldErr10() { Random rng = new FastRandom(); for (int m = 3; m < 100; m++) { double sumAvgNaive = 0; double sumAvgJS = 0; double sumDGP = 0; for (int k = 0; k < 1000; k++) { Vec set = new ArrayVec(m); final double m_0 = rng.nextDouble() * 2; Vec e = new ArrayVec(set.dim()); for (int i = 0; i < e.dim(); i++) { e.set(i, rng.nextDouble() > 0.3 ? m_0 : rng.nextDouble() * 2); } double sigma = 1; for (int t = 0; t < set.dim(); t++) { set.set(t, sigma * rng.nextGaussian() + e.get(t)); } double naive; { Vec estimate = new ArrayVec(set.dim()); naive = MathTools.meanNaive(set); fill(estimate, naive); sumAvgNaive += distance(estimate, fill(new ArrayVec(estimate.dim()), m_0)) / Math.sqrt(e.dim()); } double js; { Vec estimate = new ArrayVec(set.dim()); js = MathTools.meanJS1(set, sigma); fill(estimate, js); sumAvgJS += distance(estimate, fill(new ArrayVec(estimate.dim()), m_0)) / Math.sqrt(e.dim()); } { Vec estimate = new ArrayVec(set.dim()); double dgp = MathTools.meanDropFluctuations(set); fill(estimate, dgp); sumDGP += distance(estimate, fill(new ArrayVec(estimate.dim()), m_0)) / Math.sqrt(e.dim()); } } System.out.println(m + "\t" + sumAvgNaive / 1000 + "\t" + sumAvgJS / 1000 + "\t" + sumDGP / 1000); } } public void test1FoldErrBig10() { Random rng = new FastRandom(); for (int m = 3; m < 100; m++) { double sumAvgNaive = 0; double sumAvgJS = 0; double sumDGP = 0; for (int k = 0; k < 1000; k++) { Vec set = new ArrayVec(m); final double m_0 = rng.nextDouble() * 2 - 1; Vec e = new ArrayVec(set.dim()); for (int t = 0; t < e.dim(); t++) { e.set(t, rng.nextDouble() > 0.01 ? m_0 : (rng.nextBoolean() ? 1 : -1) / (1 - rng.nextDouble()) * 2); } double sigma = 1; for (int t = 0; t < set.dim(); t++) { set.set(t, sigma * rng.nextGaussian() + e.get(t)); } double naive; { Vec estimate = new ArrayVec(set.dim()); naive = MathTools.meanNaive(set); fill(estimate, naive); sumAvgNaive += distance(estimate, fill(new ArrayVec(estimate.dim()), m_0)) / Math.sqrt(e.dim()); } double js; { Vec estimate = new ArrayVec(set.dim()); js = MathTools.meanJS1(set, sigma); fill(estimate, js); sumAvgJS += distance(estimate, fill(new ArrayVec(estimate.dim()), m_0)) / Math.sqrt(e.dim()); } { Vec estimate = new ArrayVec(set.dim()); double dgp = MathTools.meanDropFluctuations(set); fill(estimate, dgp); sumDGP += distance(estimate, fill(new ArrayVec(estimate.dim()), m_0)) / Math.sqrt(e.dim()); } } System.out.println(m + "\t" + sumAvgNaive / 1000 + "\t" + sumAvgJS / 1000 + "\t" + sumDGP / 1000); } } }