/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ml.shifu.shifu.core.binning;
import java.util.ArrayList;
import java.util.List;
import ml.shifu.shifu.util.QuickSort;
import org.apache.commons.lang.StringUtils;
public class NativeBinning extends AbstractBinning<Double> {
private List<Double> array;
private boolean mergeEnabled;
private final static double EPS = 1e-5;
public NativeBinning(int binningNum, boolean mergeEnabled) {
super(binningNum);
this.mergeEnabled = mergeEnabled;
this.array = new ArrayList<Double>();
}
@Override
public void addData(String val) {
String fval = StringUtils.trimToEmpty(val);
if(!isMissingVal(fval)) {
double dval = 0;
try {
dval = Double.parseDouble(fval);
} catch (NumberFormatException e) {
super.incInvalidValCnt();
return;
}
array.add(dval);
} else {
super.incMissingValCnt();
}
}
@Override
public List<Double> getDataBin() {
QuickSort.sort(array);
int actualBinSize = (int) Math.ceil((double) array.size() / (double) expectedBinningNum);
int actualBiningNum = this.expectedBinningNum;
List<Double> binBoundary = new ArrayList<Double>();
binBoundary.add(Double.NEGATIVE_INFINITY);
double prevData = array.get(0);
int currBinSize = 0;
int currBinIndex = 0;
for(int i = 0; i < array.size(); i++) {
double currData = array.get(i);
currBinSize++;
if(currBinSize >= actualBinSize) {
if(currBinIndex == actualBiningNum - 1 && i != array.size() - 1) {
continue;
}
if(i == 0 || (mergeEnabled == true && Math.abs(currData - prevData) > EPS) || mergeEnabled == false) {
if(i == array.size() - 1) {
break;
}
currBinIndex++;
currBinSize = 0;
binBoundary.add(currData);
}
}
prevData = currData;
}
// binBoundary.set(binBoundary.size() - 1, Double.POSITIVE_INFINITY);
return binBoundary;
}
}