/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.core.binning; import java.io.FileInputStream; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Random; import junit.framework.Assert; import ml.shifu.shifu.container.obj.ColumnConfig; import ml.shifu.shifu.container.obj.ColumnConfig.ColumnType; import ml.shifu.shifu.container.obj.ModelConfig; import ml.shifu.shifu.container.obj.ModelStatsConf.BinningMethod; import org.apache.commons.io.IOUtils; import org.testng.annotations.Test; /** * EqualPopulationBinningTest class * * @Oct 22, 2014 * */ public class EqualPopulationBinningTest { @Test public void testBinning() { Random rd = new Random(System.currentTimeMillis()); EqualPopulationBinning binning = new EqualPopulationBinning(10); long start = System.currentTimeMillis(); for ( int i = 0; i < 100000; i ++ ) { binning.addData(Integer.toString(rd.nextInt() % 1000)); } long end = System.currentTimeMillis(); System.out.println("Spend " + (end - start) + " milli-seconds to create data."); System.out.println(binning.getDataBin()); String binStr = binning.objToString(); String[] fieldArr = binStr.split(Character.toString(AbstractBinning.FIELD_SEPARATOR)); Assert.assertTrue(fieldArr.length == 6); } @Test public void tesGussiantBinning() { long startTs = System.currentTimeMillis(); Random rd = new Random(System.currentTimeMillis()); EqualPopulationBinning binning = new EqualPopulationBinning(10); for ( int i = 0; i < 1000; i ++ ) { binning.addData(rd.nextGaussian() % 1000, rd.nextDouble() * 5.0); } System.out.println(binning.getDataBin()); System.out.println("spend " + (System.currentTimeMillis() - startTs) + " milliseconds to generate binnig."); } @Test public void testObjectSeri() { Random rd = new Random(System.currentTimeMillis()); EqualPopulationBinning binning = new EqualPopulationBinning(10); for ( int i = 0; i < 10000; i ++ ) { binning.addData(Double.toString(rd.nextGaussian() % 1000)); } String binningStr = binning.objToString(); String originalBinningData = binning.getDataBin().toString(); ModelConfig modelConfig = new ModelConfig(); modelConfig.getStats().setBinningMethod(BinningMethod.EqualPositive); ColumnConfig columnConfig = new ColumnConfig(); columnConfig.setColumnType(ColumnType.N); AbstractBinning<?> otherBinning = AbstractBinning.constructBinningFromStr(modelConfig, columnConfig, binningStr); String newBinningData = otherBinning.getDataBin().toString(); Assert.assertEquals(originalBinningData, newBinningData); } @Test public void testBingMerge() { List<EqualPopulationBinning> binningList = new ArrayList<EqualPopulationBinning>(); long start, end; for ( int i = 0; i < 10; i ++ ) { start = System.currentTimeMillis(); binningList.add(createBinning()); end = System.currentTimeMillis(); System.out.println("Spend " + (end - start) + " milli-seconds to create " + i + "-th binning."); } EqualPopulationBinning binning = createBinning(); for ( int i = 0; i < 10; i ++ ) { EqualPopulationBinning another = binningList.get(i); start = System.currentTimeMillis(); binning.mergeBin(another); end = System.currentTimeMillis(); Assert.assertTrue((end - start) < 1000); } System.out.println(binning.getDataBin().toString()); } /** * @return */ private EqualPopulationBinning createBinning() { Random rd = new Random(System.currentTimeMillis()); EqualPopulationBinning binning = new EqualPopulationBinning(20); for ( int i = 0; i < 18000; i ++ ) { binning.addData(Double.toString(rd.nextDouble() % 1000)); } return binning; } @Test public void testSerialObject() { EqualPopulationBinning binning = new EqualPopulationBinning(10); String binStr = binning.objToString(); String[] fieldArr = binStr.split(Character.toString(AbstractBinning.FIELD_SEPARATOR)); Assert.assertTrue(fieldArr.length == 5); } @Test public void testUsageAge() throws IOException { EqualPopulationBinning binning = new EqualPopulationBinning(10); List<String> usageList = IOUtils.readLines(new FileInputStream("src/test/resources/example/binning-data/usage_age.txt")); for ( String data : usageList ) { binning.addData(data); } List<Double> binBoundary = binning.getDataBin(); Assert.assertTrue(binBoundary.size() > 1); } @Test public void testReturn180d() throws IOException { EqualPopulationBinning binning = new EqualPopulationBinning(10); List<String> usageList = IOUtils.readLines(new FileInputStream("src/test/resources/example/binning-data/return_lt_180d_amt.txt")); for ( String data : usageList ) { binning.addData(data); } List<Double> binBoundary = binning.getDataBin(); Assert.assertTrue(binBoundary.size() > 1); } }