package hex;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import water.TestUtil;
import water.fvec.Vec;
import water.util.Log;
import java.util.Random;
public class GainsLiftTest extends TestUtil {
@BeforeClass public static void stall() { stall_till_cloudsize(1); }
@Test public void constant() {
int len = 100000;
double[] p = new double[len];
long[] a = new long[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = 0.343424;
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
Assert.assertTrue(gl.response_rates[0] == gl.avg_response_rate);
actual.remove();
predict.remove();
}
@Test public void good() {
int len = 100000;
double[] p = new double[len];
long[] a = new long[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = a[i] == 0 ? 0.5*rng.nextDouble() : 0.5 + rng.nextDouble() * 0.5;
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
for (int i=0;i<2;++i)
Assert.assertTrue(gl.response_rates[i] > 0.9);
for (int i=2;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] < 0.1);
actual.remove();
predict.remove();
}
@Test public void bad() {
int len = 100000;
double[] p = new double[len];
long[] a = new long[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = a[i] == 0 ? 0.5 + 0.5*rng.nextDouble() : 0.5*rng.nextDouble();
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
for (int i=gl.response_rates.length-2;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] > 0.9);
for (int i=0;i<gl.response_rates.length-2;++i)
Assert.assertTrue(gl.response_rates[i] < 0.1);
actual.remove();
predict.remove();
}
@Test public void random() {
int len = 100000;
double[] p = new double[len];
long[] a = new long[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = rng.nextDouble();
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
for (int i=0;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] > 0.19 && gl.response_rates[i] < 0.21);
actual.remove();
predict.remove();
}
@Test public void tiesNApreds() {
int len = 100000;
double[] p = new double[len];
long[] a = new long[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = rng.nextDouble() > 0.5 ? 0.7 : 0.4;
if (rng.nextDouble() > 0.85) p[i] = Double.NaN;
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
for (int i=0;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] > 0.19 && gl.response_rates[i] < 0.21);
actual.remove();
predict.remove();
}
@Test public void tiesNAlabels() {
int len = 100000;
double[] p = new double[len];
double[] a = new double[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = rng.nextDouble() > 0.5 ? 0.7 : 0.4;
if (rng.nextDouble() > 0.85) a[i] = Double.NaN;
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
for (int i=0;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] > 0.19 && gl.response_rates[i] < 0.21);
actual.remove();
predict.remove();
}
@Test public void tiesNAlabels_preds() {
int len = 100000;
double[] p = new double[len];
double[] a = new double[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = rng.nextDouble() > 0.5 ? 0.7 : 0.4;
if (rng.nextDouble() > 0.85) a[i] = Double.NaN;
if (rng.nextDouble() > 0.85) p[i] = Double.NaN;
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
for (int i=0;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] > 0.19 && gl.response_rates[i] < 0.21);
actual.remove();
predict.remove();
}
@Test public void imbalanced() {
int len = 50000;
double thresh = 1e-7;
double[] p = new double[2*len];
long[] a = new long[2*len];
Random rng = new Random(0xDECAF);
int i;
for (i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = rng.nextDouble()*thresh;
}
for (i=len; i<2*len; ++i) {
a[i] = rng.nextDouble() > 0.8 ? 1 : 0;
p[i] = (1-thresh)+thresh*rng.nextDouble();
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
for (i=0;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] > 0.19 && gl.response_rates[i] < 0.21);
actual.remove();
predict.remove();
}
@Test public void rareEvents() {
int len = 100000;
double[] p = new double[len];
long[] a = new long[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.999 ? 1 : 0;
p[i] = a[i] == 0 ? 0.5*rng.nextDouble() : 0.5 + rng.nextDouble() * 0.5;
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 10;
gl.exec();
Log.info(gl);
Assert.assertTrue(gl.response_rates[0] <= 0.011 && gl.response_rates[0] >= 0.009);
for (int i=1;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] == 0);
actual.remove();
predict.remove();
}
@Test public void rareEvents20() {
int len = 100000;
double[] p = new double[len];
long[] a = new long[len];
Random rng = new Random(0xDECAF);
for (int i=0; i<len; ++i) {
a[i] = rng.nextDouble() > 0.999 ? 1 : 0;
p[i] = a[i] == 0 ? 0.5*rng.nextDouble() : 0.5 + rng.nextDouble() * 0.5;
}
Vec actual = Vec.makeVec(a, new String[]{"N","Y"}, Vec.newKey());
Vec predict = Vec.makeVec(p, Vec.newKey());
GainsLift gl = new GainsLift(predict, actual);
gl._groups = 20;
gl.exec();
Log.info(gl);
Assert.assertTrue(gl.response_rates[0] <= 0.022 && gl.response_rates[0] >= 0.018);
for (int i=1;i<gl.response_rates.length;++i)
Assert.assertTrue(gl.response_rates[i] == 0);
actual.remove();
predict.remove();
}
}