/*
* Copyright [2012-2014] PayPal Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.shifu.core;
import ml.shifu.shifu.container.obj.EvalConfig;
import ml.shifu.shifu.container.obj.ModelConfig;
import ml.shifu.shifu.container.obj.RawSourceData.SourceType;
import ml.shifu.shifu.util.CommonUtils;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.testng.Assert;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import java.io.IOException;
/**
* DataPurifierTest class
*/
public class DataPurifierTest {
private DataPurifier dataPurifier;
private ModelConfig modelConfig;
@BeforeClass
public void setUp() throws Exception {
modelConfig = CommonUtils.loadModelConfig(
"src/test/resources/example/cancer-judgement/ModelStore/ModelSet1/ModelConfig.json",
SourceType.LOCAL);
}
@Test
public void testIsFilterOutA() throws IOException {
dataPurifier = new DataPurifier(modelConfig);
Assert.assertTrue(dataPurifier.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
modelConfig.getDataSet().setFilterExpressions("aaa == aaa");
dataPurifier = new DataPurifier(modelConfig);
Assert.assertTrue(dataPurifier.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
modelConfig.getDataSet().setFilterExpressions("1 == 2");
dataPurifier = new DataPurifier(modelConfig);
Assert.assertFalse(dataPurifier.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
modelConfig.getDataSet().setFilterExpressions("*");
dataPurifier = new DataPurifier(modelConfig);
Assert.assertTrue(dataPurifier.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
EvalConfig evalConfig = modelConfig.getEvalConfigByName("EvalA");
evalConfig.getDataSet().setFilterExpressions("diagnosis == \"M\"");
dataPurifier = new DataPurifier(evalConfig);
Assert.assertTrue(dataPurifier.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
Assert.assertFalse(dataPurifier.isFilterOut("B|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
evalConfig.getDataSet().setFilterExpressions(" ");
dataPurifier = new DataPurifier(evalConfig);
Assert.assertTrue(dataPurifier.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
Assert.assertTrue(dataPurifier.isFilterOut("B|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
evalConfig.getDataSet().setFilterExpressions(" ASDF *** SDFKSADFJKS > ");
dataPurifier = new DataPurifier(evalConfig);
Assert.assertTrue(dataPurifier.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
Assert.assertTrue(dataPurifier.isFilterOut("B|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
}
@Test
public void testFilterNull() throws IOException {
modelConfig.getDataSet().setFilterExpressions("diagnosis != \"null\"");
dataPurifier = new DataPurifier(modelConfig);
Assert.assertFalse(dataPurifier.isFilterOut("null|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
Assert.assertTrue(dataPurifier.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
}
@Test
public void testFilterEqualNull() throws IOException {
modelConfig.getDataSet().setFilterExpressions("diagnosis != \"NULL\" ");
dataPurifier = new DataPurifier(modelConfig);
Assert.assertFalse(dataPurifier
.isFilterOut("NULL|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
Assert.assertTrue(dataPurifier
.isFilterOut("M|17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189"));
}
@Test
public void testFilterIsNull() throws IOException {
modelConfig.getDataSet().setFilterExpressions("diagnosis != null ");
dataPurifier = new DataPurifier(modelConfig);
Tuple tuple = TupleFactory.getInstance().newTuple();
tuple.append(null);
String[] fields = "17.99|10.38|122.8|1001|0.1184|0.2776|0.3001|0.1471|0.2419|0.07871|1.095|0.9053|8.589|153.4|0.006399|0.04904|0.05373|0.01587|0.03003|0.006193|25.38|17.33|184.6|2019|0.1622|0.6656|0.7119|0.2654|0.4601|0.1189".split("\\|");
for ( String f : fields ) {
tuple.append(f);
}
Assert.assertFalse(dataPurifier.isFilterOut(tuple));
tuple = TupleFactory.getInstance().newTuple();
tuple.append(new Object());
for ( String f : fields ) {
tuple.append(f);
}
Assert.assertTrue(dataPurifier.isFilterOut(tuple));
modelConfig.getDataSet().setFilterExpressions("diagnosis == null ");
dataPurifier = new DataPurifier(modelConfig);
tuple = TupleFactory.getInstance().newTuple();
tuple.append(null);
for ( String f : fields ) {
tuple.append(f);
}
Assert.assertTrue(dataPurifier.isFilterOut(tuple));
tuple = TupleFactory.getInstance().newTuple();
tuple.append(new Object());
for ( String f : fields ) {
tuple.append(f);
}
Assert.assertFalse(dataPurifier.isFilterOut(tuple));
}
}