/* * Copyright [2013-2015] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.core.binning; import ml.shifu.shifu.core.binning.obj.NumBinInfo; import org.apache.commons.lang.StringUtils; import org.testng.Assert; import org.testng.annotations.Test; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.Random; /** * Created by zhanhu on 7/6/16. */ public class DynamicBinningTest { @Test public void testDIB() { List<NumBinInfo> binInfoList = createNumBinInfos(30); DynamicBinning dynamicBinning = new DynamicBinning(binInfoList, 10); Assert.assertTrue(dynamicBinning.getDataBin().size() <= 10); } public static List<NumBinInfo> createNumBinInfos(int binCnt) { Random rd = new Random(System.currentTimeMillis()); List<Double> thresholds = new ArrayList<Double>(binCnt - 1); for ( int i = 0; i < binCnt - 1; i ++ ) { thresholds.add(rd.nextGaussian() * 200); } Collections.sort(thresholds); List<NumBinInfo> binInfoList = NumBinInfo.constructNumBinfo(StringUtils.join(thresholds, ':'), ':'); for ( NumBinInfo binInfo : binInfoList ) { if ( rd.nextDouble() > 0.45 ) { int total = rd.nextInt() % 1000; int positive = (int) (total * rd.nextDouble()); binInfo.setTotalInstCnt(total); binInfo.setPositiveInstCnt(positive); } } return binInfoList; } @Test public void testArsData() { String binsData = "-7919.295, -7836.25146, -7753.207920000001, -7670.164380000001, -7587.120840000001, -7504.077300000002, -7421.033760000002, -7337.990220000002, -7254.946680000003, -7171.903140000003, -7088.859600000003, -7005.816060000004, -6922.772520000004, -6839.728980000004, -6756.685440000005, -6673.641900000005, -6590.598360000005, -6507.554820000006, -6424.511280000006, -6341.467740000006, -6258.424200000007, -6175.380660000007, -6092.337120000007, -6009.293580000008, -5926.250040000008, -5843.206500000008, -5760.162960000009, -5677.119420000009, -5594.075880000009, -5511.03234000001, -5427.98880000001, -5344.94526000001, -5261.901720000011, -5178.858180000011, -5095.814640000011, -5012.771100000012, -4929.727560000012, -4846.684020000012, -4763.640480000013, -4680.596940000013, -4597.553400000013, -4514.509860000014, -4431.466320000014, -4348.422780000014, -4265.379240000015, -4182.335700000015, -4099.292160000015, -4016.2486200000153, -3933.205080000015, -3850.161540000015, -3767.118000000015, -3684.074460000015, -3601.0309200000147, -3517.9873800000146, -3434.9438400000145, -3351.9003000000143, -3268.856760000014, -3185.813220000014, -3102.769680000014, -3019.726140000014, -2936.6826000000137, -2853.6390600000136, -2770.5955200000135, -2687.5519800000134, -2604.5084400000133, -2521.464900000013, -2438.421360000013, -2355.377820000013, -2272.3342800000128, -2189.2907400000126, -2106.2472000000125, -2023.2036600000126, -1940.1601200000127, -1857.1165800000128, -1774.073040000013, -1691.029500000013, -1607.9859600000132, -1524.9424200000133, -1441.8988800000134, -1358.8553400000135, -1275.8118000000136, -1192.7682600000137, -1109.7247200000138, -1026.681180000014, -943.6376400000139, -860.5941000000139, -777.5505600000139, -694.5070200000139, -611.4634800000139, -528.4199400000139, -445.37640000001386, -362.33286000001385, -279.28932000001384, -196.24578000001384, -113.20224000001384, -30.15870000001385, 52.88483999998614, 135.92837999998613, 218.97191999998614, 302.01545999998615, 385.05899999998616, 468.10253999998616, 551.1460799999861, 634.1896199999861, 717.2331599999861, 800.2766999999861, 883.3202399999861, 966.3637799999861, 1049.4073199999862, 1132.450859999986, 1215.494399999986, 1298.5379399999858, 1381.5814799999857, 1464.6250199999856, 1547.6685599999855, 1630.7120999999854, 1713.7556399999853, 1796.7991799999852, 1879.842719999985, 1962.886259999985, 2045.9297999999849, 2128.973339999985, 2212.016879999985, 2295.0604199999852, 2378.1039599999854, 2461.1474999999855, 2544.1910399999856, 2627.2345799999857, 2710.278119999986, 2793.321659999986, 2876.365199999986, 2959.408739999986, 3042.4522799999863, 3125.4958199999865, 3208.5393599999866, 3291.5828999999867, 3374.626439999987, 3457.669979999987, 3540.713519999987, 3623.757059999987, 3706.8005999999873, 3789.8441399999874, 3872.8876799999875, 3955.9312199999877, 4038.974759999988, 4122.018299999988, 4205.061839999988, 4288.105379999987, 4371.148919999987, 4454.192459999987, 4537.235999999986, 4620.279539999986, 4703.323079999986, 4786.366619999985, 4869.410159999985, 4952.453699999985, 5035.497239999984, 5118.540779999984, 5201.584319999984, 5284.627859999983, 5367.671399999983, 5450.714939999983, 5533.758479999982, 5616.802019999982, 5699.845559999982, 5782.889099999981, 5865.932639999981, 5948.976179999981, 6032.01971999998, 6115.06325999998, 6198.10679999998, 6281.150339999979, 6364.193879999979, 6447.237419999979, 6530.280959999978, 6613.324499999978, 6696.368039999978, 6779.411579999977, 6862.455119999977, 6945.498659999977, 7028.542199999976, 7111.585739999976, 7194.629279999976, 7277.672819999975, 7360.716359999975, 7443.759899999975, 7526.803439999974, 7609.846979999974, 7692.890519999974, 7775.934059999973, 7858.977599999973, 7942.021139999973, 8025.064679999972, 8108.108219999972, 8191.151759999972, 8274.195299999972, 8357.238839999973, 8440.282379999973, 8523.325919999974, 8606.369459999974, 8689.412999999975, 8772.456539999976, 8855.500079999976, 8938.543619999977, 9021.587159999977, 9104.630699999978, 9187.674239999978, 9270.717779999979, 9353.76131999998, 9436.80485999998, 9519.84839999998, 9602.891939999981, 9685.935479999982, 9768.979019999982, 9852.022559999983, 9935.066099999984, 10018.109639999984, 10101.153179999985, 10184.196719999985, 10267.240259999986, 10350.283799999987, 10433.327339999987, 10516.370879999988, 10599.414419999988, 10682.457959999989, 10765.50149999999, 10848.54503999999, 10931.58857999999, 11014.632119999991, 11097.675659999992, 11180.719199999992, 11263.762739999993, 11346.806279999993, 11429.849819999994, 11512.893359999995, 11595.936899999995, 11678.980439999996, 11762.023979999996, 11845.067519999997, 11928.111059999997, 12011.154599999998, 12094.198139999999, 12177.24168, 12260.28522, 12343.32876, 12426.3723, 12509.415840000001, 12592.459380000002, 12675.502920000003, 12758.546460000003, 12841.590000000004, 12924.633540000004, 13007.677080000005, 13090.720620000006, 13173.764160000006, 13256.807700000007, 13339.851240000007, 13422.894780000008, 13505.938320000008, 13588.981860000009, 13672.02540000001, 13755.06894000001, 13838.11248000001, 13921.156020000011, 14004.199560000012, 14087.243100000012, 14170.286640000013, 14253.330180000014, 14336.373720000014, 14419.417260000015, 14502.460800000015, 14585.504340000016, 14668.547880000016, 14751.591420000017, 14834.634960000018, 14917.678500000018, 15000.722040000019, 15083.76558000002, 15166.80912000002, 15249.85266000002, 15332.896200000021, 15415.939740000022, 15498.983280000022, 15582.026820000023, 15665.070360000023, 15748.113900000024, 15831.157440000025, 15914.200980000025, 15997.244520000026, 16080.288060000026, 16163.331600000027, 16246.375140000027, 16329.418680000028, 16412.462220000027, 16495.505760000025, 16578.549300000024, 16661.592840000023, 16744.63638000002, 16827.67992000002, 16910.72346000002, 16993.767000000018, 17076.810540000017, 17159.854080000016, 17242.897620000014, 17325.941160000013, 17408.98470000001, 17492.02824000001, 17575.07178000001, 17658.115320000008, 17741.158860000007, 17824.202400000006, 17907.245940000004, 17990.289480000003, 18073.333020000002, 18156.37656, 18239.4201, 18322.463639999998, 18405.507179999997, 18488.550719999996, 18571.594259999994, 18654.637799999993, 18737.681339999992, 18820.72487999999, 18903.76841999999, 18986.811959999988, 19069.855499999987, 19152.899039999986, 19235.942579999984, 19318.986119999983, 19402.029659999982, 19485.07319999998, 19568.11673999998, 19651.16027999998, 19734.203819999977, 19817.247359999976, 19900.290899999974, 19983.334439999973, 20066.377979999972, 20149.42151999997, 20232.46505999997, 20315.50859999997, 20398.552139999967, 20481.595679999966, 20564.639219999965, 20647.682759999963, 20730.726299999962, 20813.76983999996, 20896.81337999996, 20979.85691999996, 21062.900459999957, 21145.943999999956, 21228.987539999955, 21312.031079999953, 21395.074619999952, 21478.11815999995, 21561.16169999995, 21644.20523999995, 21727.248779999947, 21810.292319999946, 21893.335859999945, 21976.379399999943, 22059.422939999942, 22142.46647999994, 22225.51001999994, 22308.55355999994, 22391.597099999937, 22474.640639999936, 22557.684179999935, 22640.727719999933, 22723.771259999932, 22806.81479999993, 22889.85833999993, 22972.90187999993, 23055.945419999927, 23138.988959999926, 23222.032499999925, 23305.076039999924, 23388.119579999922, 23471.16311999992, 23554.20665999992, 23637.25019999992, 23720.293739999917, 23803.337279999916, 23886.380819999915, 23969.424359999914, 24052.467899999912, 24135.51143999991, 24218.55497999991, 24301.59851999991, 24384.642059999907, 24467.685599999906, 24550.729139999905, 24633.772679999904, 24716.816219999902, 24799.8597599999, 24882.9032999999, 24965.9468399999, 25048.990379999897, 25132.033919999896, 25215.077459999895, 25298.120999999894, 25381.164539999892, 25464.20807999989, 25547.25161999989, 25630.29515999989, 25713.338699999887, 25796.382239999886, 25879.425779999885, 25962.469319999884, 26045.512859999882, 26128.55639999988, 26211.59993999988, 26294.64347999988, 26377.687019999878, 26460.730559999876, 26543.774099999875, 26626.817639999874, 26709.861179999873, 26792.90471999987, 26875.94825999987, 26958.99179999987, 27042.035339999868, 27125.078879999866, 27208.122419999865, 27291.165959999864, 27374.209499999863, 27457.25303999986, 27540.29657999986, 27623.34011999986, 27706.383659999858, 27789.427199999856, 27872.470739999855, 27955.514279999854, 28038.557819999853, 28121.60135999985, 28204.64489999985, 28287.68843999985, 28370.731979999848, 28453.775519999846, 28536.819059999845, 28619.862599999844, 28702.906139999843, 28785.94967999984, 28868.99321999984, 28952.03675999984, 29035.080299999838, 29118.123839999836, 29201.167379999835, 29284.210919999834, 29367.254459999833, 29450.29799999983, 29533.34153999983, 29616.38507999983, 29699.428619999828, 29782.472159999827, 29865.515699999825, 29948.559239999824, 30031.602779999823, 30114.64631999982, 30197.68985999982, 30280.73339999982, 30363.776939999818, 30446.820479999817, 30529.864019999815, 30612.907559999814, 30695.951099999813, 30778.99463999981, 30862.03817999981, 30945.08171999981, 31028.125259999808, 31111.168799999807, 31194.212339999805, 31277.255879999804, 31360.299419999803, 31443.3429599998, 31526.3864999998, 31609.4300399998, 31692.473579999798, 31775.517119999797, 31858.560659999795, 31941.604199999794, 32024.647739999793, 32107.69127999979, 32190.73481999979, 32273.77835999979, 32356.821899999788, 32439.865439999787, 32522.908979999786, 32605.952519999784, 32688.996059999783, 32772.039599999785, 32855.083139999784, 32938.12667999978, 33021.17021999978, 33104.21375999978, 33187.25729999978, 33270.30083999978, 33353.34437999978, 33436.387919999775, 33519.431459999774, 33602.47499999977, 33685.51853999977, 33768.56207999977, 33851.60561999977, 33934.64915999977, 34017.69269999977, 34100.736239999766, 34183.779779999764, 34266.82331999976, 34349.86685999976, 34432.91039999976, 34515.95393999976, 34598.99747999976, 34682.04101999976, 34765.084559999756, 34848.128099999754, 34931.17163999975, 35014.21517999975, 35097.25871999975, 35180.30225999975, 35263.34579999975, 35346.38933999975, 35429.432879999746, 35512.476419999744, 35595.51995999974, 35678.56349999974, 35761.60703999974, 35844.65057999974, 35927.69411999974, 36010.73765999974, 36093.781199999736, 36176.824739999734, 36259.86827999973, 36342.91181999973, 36425.95535999973, 36508.99889999973, 36592.04243999973, 36675.08597999973, 36758.129519999726, 36841.173059999725, 36924.21659999972, 37007.26013999972, 37090.30367999972, 37173.34721999972, 37256.39075999972, 37339.43429999972, 37422.477839999716, 37505.521379999715, 37588.56491999971, 37671.60845999971, 37754.65199999971, 37837.69553999971, 37920.73907999971, 38003.78261999971, 38086.826159999706, 38169.869699999705, 38252.9132399997, 38335.9567799997, 38419.0003199997, 38502.0438599997, 38585.0873999997, 38668.1309399997, 38751.174479999696, 38834.218019999695, 38917.26155999969, 39000.30509999969, 39083.34863999969, 39166.39217999969, 39249.43571999969, 39332.47925999969, 39415.522799999686, 39498.566339999685, 39581.60987999968, 39664.65341999968, 39747.69695999968, 39830.74049999968, 39913.78403999968, 39996.82757999968, 40079.871119999676, 40162.914659999675, 40245.95819999967, 40329.00173999967, 40412.04527999967, 40495.08881999967, 40578.13235999967, 40661.17589999967, 40744.219439999666, 40827.262979999665, 40910.30651999966, 40993.35005999966, 41076.39359999966, 41159.43713999966, 41242.48067999966, 41325.52421999966, 41408.567759999656, 41491.611299999655, 41574.65483999965, 41657.69837999965, 41740.74191999965, 41823.78545999965, 41906.82899999965, 41989.87253999965, 42072.916079999646, 42155.959619999645, 42239.003159999644, 42322.04669999964, 42405.09023999964, 42488.13377999964, 42571.17731999964, 42654.22085999964, 42737.264399999636, 42820.307939999635, 42903.351479999634, 42986.39501999963, 43069.43855999963, 43152.48209999963, 43235.52563999963, 43318.56917999963, 43401.612719999626, 43484.656259999625, 43567.699799999624, 43650.74333999962, 43733.78687999962, 43816.83041999962, 43899.87395999962, 43982.91749999962, 44065.961039999616, 44149.004579999615, 44232.048119999614, 44315.09165999961, 44398.13519999961, 44481.17873999961, 44564.22227999961, 44647.26581999961, 44730.30935999961, 44813.352899999605, 44896.396439999604, 44979.4399799996, 45062.4835199996, 45145.5270599996, 45228.5705999996, 45311.6141399996, 45394.6576799996, 45477.701219999595, 45560.744759999594, 45643.78829999959, 45726.83183999959, 45809.87537999959, 45892.91891999959, 45975.96245999959, 46059.00599999959, 46142.049539999585, 46225.093079999584, 46308.13661999958, 46391.18015999958, 46474.22369999958, 46557.26723999958, 46640.31077999958, 46723.35431999958, 46806.397859999575, 46889.441399999574, 46972.48493999957, 47055.52847999957, 47138.57201999957, 47221.61555999957, 47304.65909999957, 47387.70263999957, 47470.746179999565, 47553.789719999564, 47636.83325999956, 47719.87679999956, 47802.92033999956, 47885.96387999956, 47969.00741999956, 48052.05095999956, 48135.094499999555, 48218.138039999554, 48301.18157999955, 48384.22511999955, 48467.26865999955, 48550.31219999955, 48633.35573999955, 48716.39927999955, 48799.442819999545, 48882.486359999544, 48965.52989999954, 49048.57343999954, 49131.61697999954, 49214.66051999954, 49297.70405999954, 49380.74759999954, 49463.791139999536, 49546.834679999534, 49629.87821999953, 49712.92175999953, 49795.96529999953, 49879.00883999953, 49962.05237999953, 50045.09591999953, 50128.139459999526, 50211.182999999524, 50294.22653999952, 50377.27007999952, 50460.31361999952, 50543.35715999952, 50626.40069999952, 50709.44423999952, 50792.487779999516, 50875.531319999514, 50958.57485999951, 51041.61839999951, 51124.66193999951, 51207.70547999951, 51290.74901999951, 51373.79255999951, 51456.836099999506, 51539.879639999504, 51622.9231799995, 51705.9667199995, 51789.0102599995, 51872.0537999995, 51955.0973399995, 52038.1408799995, 52121.184419999496, 52204.227959999494, 52287.27149999949, 52370.31503999949, 52453.35857999949, 52536.40211999949, 52619.44565999949, 52702.48919999949, 52785.532739999486, 52868.576279999485, 52951.61981999948, 53034.66335999948, 53117.70689999948, 53200.75043999948, 53283.79397999948, 53366.83751999948, 53449.881059999476, 53532.924599999475, 53615.96813999947, 53699.01167999947, 53782.05521999947, 53865.09875999947, 53948.14229999947, 54031.18583999947, 54114.229379999466, 54197.272919999465, 54280.31645999946, 54363.35999999946, 54446.40353999946, 54529.44707999946, 54612.49061999946, 54695.53415999946, 54778.577699999456, 54861.621239999455, 54944.66477999945, 55027.70831999945, 55110.75185999945, 55193.79539999945, 55276.83893999945, 55359.88247999945, 55442.926019999446, 55525.969559999445, 55609.01309999944, 55692.05663999944, 55775.10017999944, 55858.14371999944, 55941.18725999944, 56024.23079999944, 56107.274339999436, 56190.317879999435, 56273.36141999943, 56356.40495999943, 56439.44849999943, 56522.49203999943, 56605.53557999943, 56688.57911999943, 56771.622659999426, 56854.666199999425, 56937.70973999942, 57020.75327999942, 57103.79681999942, 57186.84035999942, 57269.88389999942, 57352.92743999942, 57435.970979999416, 57519.014519999415, 57602.058059999414, 57685.10159999941, 57768.14513999941, 57851.18867999941, 57934.23221999941, 58017.27575999941, 58100.319299999406, 58183.362839999405, 58266.406379999404, 58349.4499199994, 58432.4934599994, 58515.5369999994, 58598.5805399994, 58681.6240799994, 58764.667619999396, 58847.711159999395, 58930.754699999394, 59013.79823999939, 59096.84177999939, 59179.88531999939, 59262.92885999939, 59345.97239999939, 59429.015939999386, 59512.059479999385, 59595.103019999384, 59678.14655999938, 59761.19009999938, 59844.23363999938, 59927.27717999938, 60010.32071999938, 60093.36425999938, 60176.407799999375, 60259.451339999374, 60342.49487999937, 60425.53841999937, 60508.58195999937, 60591.62549999937, 60674.66903999937, 60757.71257999937, 60840.756119999365, 60923.799659999364, 61006.84319999936, 61089.88673999936, 61172.93027999936, 61255.97381999936, 61339.01735999936, 61422.06089999936, 61505.104439999355, 61588.147979999354, 61671.19151999935, 61754.23505999935, 61837.27859999935, 61920.32213999935, 62003.36567999935, 62086.40921999935, 62169.452759999345, 62252.496299999344, 62335.53983999934, 62418.58337999934, 62501.62691999934, 62584.67045999934, 62667.71399999934, 62750.75753999934, 62833.801079999335, 62916.844619999334, 62999.88815999933, 63082.93169999933, 63165.97523999933, 63249.01877999933, 63332.06231999933, 63415.10585999933, 63498.149399999325, 63581.192939999324, 63664.23647999932, 63747.28001999932, 63830.32355999932, 63913.36709999932, 63996.41063999932, 64079.45417999932, 64162.497719999315, 64245.541259999314, 64328.58479999931, 64411.62833999931, 64494.67187999931, 64577.71541999931, 64660.75895999931, 64743.80249999931, 64826.846039999306, 64909.889579999304, 64992.9331199993, 65075.9766599993, 65159.0201999993, 65242.0637399993, 65325.1072799993, 65408.1508199993, 65491.194359999296, 65574.2378999993, 65657.2814399993, 65740.32497999929, 65823.36851999929, 65906.41205999929, 65989.45559999929, 66072.49913999929, 66155.54267999929, 66238.58621999928, 66321.62975999928, 66404.67329999928, 66487.71683999928, 66570.76037999928, 66653.80391999928, 66736.84745999928, 66819.89099999928, 66902.93453999927, 66985.97807999927, 67069.02161999927, 67152.06515999927, 67235.10869999927, 67318.15223999927, 67401.19577999927, 67484.23931999927, 67567.28285999926, 67650.32639999926, 67733.36993999926, 67816.41347999926, 67899.45701999926, 67982.50055999926, 68065.54409999926, 68148.58763999926, 68231.63117999925, 68314.67471999925, 68397.71825999925, 68480.76179999925, 68563.80533999925, 68646.84887999925, 68729.89241999925, 68812.93595999925, 68895.97949999924, 68979.02303999924, 69062.06657999924, 69145.11011999924, 69228.15365999924, 69311.19719999924, 69394.24073999924, 69477.28427999924, 69560.32781999923, 69643.37135999923, 69726.41489999923, 69809.45843999923, 69892.50197999923, 69975.54551999923, 70058.58905999923, 70141.63259999923, 70224.67613999922, 70307.71967999922, 70390.76321999922, 70473.80675999922, 70556.85029999922, 70639.89383999922, 70722.93737999922, 70805.98091999922, 70889.02445999921, 70972.06799999921, 71055.11153999921, 71138.15507999921, 71221.19861999921, 71304.24215999921, 71387.28569999921, 71470.3292399992, 71553.3727799992, 71636.4163199992, 71719.4598599992, 71802.5033999992, 71885.5469399992, 71968.5904799992, 72051.6340199992, 72134.6775599992, 72217.7210999992, 72300.7646399992, 72383.80817999919, 72466.85171999919, 72549.89525999919, 72632.93879999919, 72715.98233999919, 72799.02587999919, 72882.06941999918, 72965.11295999918, 73048.15649999918, 73131.20003999918, 73214.24357999918, 73297.28711999918, 73380.33065999918, 73463.37419999918, 73546.41773999917, 73629.46127999917, 73712.50481999917, 73795.54835999917, 73878.59189999917, 73961.63543999917, 74044.67897999917, 74127.72251999917, 74210.76605999917, 74293.80959999916, 74376.85313999916, 74459.89667999916, 74542.94021999916, 74625.98375999916, 74709.02729999916, 74792.07083999916, 74875.11437999916, 74958.15791999915, 75041.20145999915, 75124.245"; List<NumBinInfo> binInfoList = NumBinInfo.constructNumBinfo(binsData, ','); System.out.println(binInfoList.size()); } }