package ml.shifu.shifu.udf; import java.io.IOException; import java.io.InputStream; import java.util.List; import java.util.Random; import java.util.zip.GZIPInputStream; import ml.shifu.shifu.core.binning.DynamicBinningTest; import ml.shifu.shifu.core.binning.EqualIntervalBinning; import ml.shifu.shifu.core.binning.obj.NumBinInfo; import org.apache.commons.io.IOUtils; import org.apache.commons.lang.StringUtils; import org.apache.pig.data.DataBag; import org.apache.pig.data.DefaultDataBag; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.testng.Assert; import org.testng.annotations.Test; /** * Created by zhanhu on 7/6/16. */ public class DynamicBinningUDFTest { @Test public void testDynamicBinningUDFTest() throws IOException { DynamicBinningUDF inst = new DynamicBinningUDF("LOCAL", "src/test/resources/example/cancer-judgement/ModelStore/ModelSet1/ModelConfig.json", "src/test/resources/example/cancer-judgement/ModelStore/ModelSet1/ColumnConfig.json", "src/test/resources/example/partfile"); List<NumBinInfo> binInfoList = DynamicBinningTest.createNumBinInfos(10000); Random rd = new Random(System.currentTimeMillis()); long startTs = System.currentTimeMillis(); for (int i = 0; i < 10000; i++) { double val = rd.nextDouble() * 200; NumBinInfo numBinInfo = inst.binaryLocate(binInfoList, val); Assert.assertNotNull(numBinInfo); } System.out.println("Spend " + (System.currentTimeMillis() - startTs) + "-ms to query binning number according value."); } @Test public void testDynamicBinningUDF2Test() throws IOException { DynamicBinningUDF inst = new DynamicBinningUDF("LOCAL", "src/test/resources/example/inner_seg1_v15/ModelConfig.json", "src/test/resources/example/inner_seg1_v15/ColumnConfig.json", "src/test/resources/example/inner_seg1_v15/smallbins"); Tuple input = TupleFactory.getInstance().newTuple(1); input.set(0, createDataBag()); String binsText = (String)inst.exec(input).get(1); Assert.assertEquals(StringUtils.split(binsText, CalculateStatsUDF.CATEGORY_VAL_SEPARATOR).length, 10); } private DataBag createDataBag() throws IOException { InputStream is = DynamicBinningUDFTest.class.getResourceAsStream("/example/inner_seg1_v15/dib_sample_fields.gz"); GZIPInputStream gzis = new GZIPInputStream(is); DataBag databag = new DefaultDataBag(); EqualIntervalBinning inst = new EqualIntervalBinning(1000); List<String> lines = IOUtils.readLines(gzis); for (String record : lines) { String[] fields = record.split("\\|"); inst.addData(fields[0]); Tuple tuple = TupleFactory.getInstance().newTuple(4); tuple.set(0, 1); tuple.set(1, fields[0]); tuple.set(2, (fields[1].equals("1") ? Boolean.TRUE : Boolean.FALSE)); tuple.set(3, 0); databag.add(tuple); } IOUtils.closeQuietly(gzis); IOUtils.closeQuietly(is); return databag; } }