package hex.drf;
import hex.drf.DRF.DRFModel;
import org.junit.*;
import water.*;
import water.fvec.Frame;
import water.fvec.Vec;
import water.util.Utils;
public class DRFModelAdaptTest extends TestUtil {
private abstract class PrepData { abstract Vec prep(Frame fr); int needAdaptation(Frame fr) { return fr.numCols(); };}
@BeforeClass public static void stall() { stall_till_cloudsize(3); }
/**
* The scenario:
* - test data contains an input column which contains less enum values than the same column in train data.
* In this case we should provide correct values mapping:
* A - 0
* B - 1 B - 0 B - 1
* C - 2 D - 1 mapping should remap it into: D - 3
* D - 3
*/
//@Ignore
@Test public void testModelAdapt1() {
testModelAdaptation(
"./smalldata/test/classifier/coldom_train_1.csv",
"./smalldata/test/classifier/coldom_test_1.csv",
new PrepData() { @Override Vec prep(Frame fr) { return fr.vecs()[fr.numCols()-1]; } },
true);
}
/**
* The scenario:
* - test data contains an input column which contains more enum values than the same column in train data.
* A - 0
* B - 1 B - 0 B - 1
* C - 2 X - 1 mapping should remap it into: X - NA
* D - 3
*/
//@Ignore
@Test public void testModelAdapt1_2() {
testModelAdaptation(
"./smalldata/test/classifier/coldom_train_1.csv",
"./smalldata/test/classifier/coldom_test_1_2.csv",
new PrepData() { @Override Vec prep(Frame fr) { return fr.vecs()[fr.numCols()-1]; } },
true);
}
//@Ignore
@Test public void testModelAdapt2() {
testModelAdaptation(
"./smalldata/test/classifier/coldom_train_2.csv",
"./smalldata/test/classifier/coldom_test_2.csv",
new PrepData() { @Override Vec prep(Frame fr) { return fr.vecs()[fr.find("R")]; }; @Override int needAdaptation(Frame fr) { return 0;} },
true);
}
/** Test adaptation of numeric values in response column. */
//@Ignore
@Test public void testModelAdapt3() {
testModelAdaptation(
"./smalldata/test/classifier/coldom_train_3.csv",
"./smalldata/test/classifier/coldom_test_3.csv",
new PrepData() { @Override Vec prep(Frame fr) { return fr.vecs()[fr.numCols()-1]; } },
false);
}
static final int[] a(int ...arr) { return arr; }
@Test public void testBasics_1() {
// Simple domain mapping
Assert.assertArrayEquals( a(0, 1, 2, 3), Utils.mapping(a( 0, 1, 2, 3)));
Assert.assertArrayEquals( a(0, 1, 2, -1, 3), Utils.mapping(a( 0, 1, 2, 4)));
Assert.assertArrayEquals( a(0, -1, 1), Utils.mapping(a(-1, 1)));
Assert.assertArrayEquals( a(0, -1, 1, -1, 2), Utils.mapping(a(-1, 1, 3)));
}
@Test public void testBasics_2() {
Assert.assertArrayEquals( a(2, 30, 400, 5000), Utils.compose(Utils.mapping(a( 0, 1, 2, 3)), a(2,30,400,5000) ));
Assert.assertArrayEquals( a(2, 30, 400, -1, 5000), Utils.compose(Utils.mapping(a( 0, 1, 2, 4)), a(2,30,400,5000) ));
Assert.assertArrayEquals( a(2, -1, 30), Utils.compose(Utils.mapping(a(-1, 1)), a(2,30,400,5000) ));
Assert.assertArrayEquals( a(2, -1, 30, -1, 400), Utils.compose(Utils.mapping(a(-1, 1, 3)), a(2,30,400,5000) ));
}
void testModelAdaptation(String train, String test, PrepData dprep, boolean exactAdaptation) {
DRFModel model = null;
Frame frTest = null;
Frame frTrain = null;
Key trainKey = Key.make("train.hex");
Key testKey = Key.make("test.hex");
Frame[] frAdapted = null;
try {
// Prepare a simple model
frTrain = parseFrame(trainKey, train);
model = runDRF(frTrain,dprep);
// Load test dataset - test data contains input columns matching train data,
// BUT each input requires adaptation. Moreover, test data contains additional columns
// containing correct value mapping.
frTest = parseFrame(testKey, test);
Assert.assertEquals("TEST CONF ERROR: The test dataset should contain 2*<number of input columns>+1!", 2*(frTrain.numCols()-1)+1, frTest.numCols());
// Adapt test dataset
frAdapted = model.adapt(frTest, exactAdaptation); // do/do not perform translation to enums
Assert.assertEquals("Adapt method should return two frames", 2, frAdapted.length);
Assert.assertEquals("Test expects that all columns in test dataset has to be adapted", dprep.needAdaptation(frTrain), frAdapted[1].numCols());
// Compare vectors
Frame adaptedFrame = frAdapted[0];
//System.err.println(frTest.toStringAll());
//System.err.println(adaptedFrame.toStringAll());
for (int av=0; av<frTrain.numCols()-1; av++) {
int ev = av + frTrain.numCols();
Vec actV = adaptedFrame.vecs()[av];
Vec expV = frTest.vecs()[ev];
Assert.assertEquals("Different number of rows in test vectors", expV.length(), actV.length());
for (long r=0; r<expV.length(); r++) {
if (expV.isNA(r)) Assert.assertTrue("Badly adapted vector - expected NA! Col: " + av + ", row: " + r, actV.isNA(r));
else {
Assert.assertTrue("Badly adapted vector - expected value but get NA! Col: " + av + ", row: " + r, !actV.isNA(r));
Assert.assertEquals("Badly adapted vector - wrong values! Col: " + av + ", row: " + r, expV.at8(r), actV.at8(r));
}
}
}
} finally {
// Test cleanup
if( model !=null ) model .delete();
if( frTrain!=null ) frTrain.delete();
if( frTest !=null ) frTest .delete();
// Remove adapted vectors which were saved into KV-store, rest of vectors are remove by frTest.remove()
if (frAdapted!=null) frAdapted[1].delete();
}
}
private DRFModel runDRF(Frame data, PrepData dprep) {
DRF drf = new DRF();
drf.source = data;
drf.response = dprep.prep(data);
drf.ntrees = 1;
drf.invoke();
return UKV.get(drf.dest());
}
}