/* * Copyright [2013-2015] PayPal Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ml.shifu.shifu.udf.stats; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import ml.shifu.shifu.util.CommonUtils; /** * Created by Mark on 5/27/2016. */ public class NumericCounter extends Counter { private Set<String> missingValSet = new HashSet<String>(); private List<Double> binBoundary; @SuppressWarnings("unused") private String name; private Long[] counter; private double unitSum = 0.0; public NumericCounter(List<String> missingInvalidValues, String name, List<Double> binBoundary) { this.missingValSet.addAll(missingInvalidValues); this.name = name; this.binBoundary = binBoundary; this.counter = new Long[binBoundary.size() + 1]; Arrays.fill(counter, 0L); } @Override public void addData(String val) { if ( val == null || missingValSet.contains(val) ) { counter[binBoundary.size()] = counter[binBoundary.size()] + 1; } else { try { Double dVal = Double.parseDouble(val.toString()); int index = CommonUtils.getBinIndex(binBoundary, dVal); counter[index] = counter[index] + 1; unitSum += dVal; } catch (Exception e) { // logger.warn("Unable to count this column {} with {}, using default value", name, val); counter[binBoundary.size()] = counter[binBoundary.size()] + 1; } } } @Override public List<Long> getCounter() { return Arrays.asList(counter); } @Override public double getUnitMean() { long total = getTotalInstCnt(); double unitMean; if ( total == 0 || total == counter[binBoundary.size()] ){ // no instance or all missing unitMean = Double.NaN; } else { unitMean = this.unitSum / total; } return unitMean; } @Override public double getMissingRate() { long total = getTotalInstCnt(); double missingInstCnt = counter[binBoundary.size()]; return ((total != 0) ? missingInstCnt/total : 0.0); } @Override public long getTotalInstCnt() { long total = 0; for ( Long val: counter ) { total += val; } return total; } }