/* * chombo: Hadoop Map Reduce utility * Author: Pranab Ghosh * * Licensed under the Apache License, Version 2.0 (the "License"); you * may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. */ package org.chombo.util; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.conf.Configuration; /** * Zscore based outlier detector * @author pranab * */ public class AttributeZscoreFilter { private Map<Integer, ZscoreFilter> filters = new HashMap<Integer, ZscoreFilter>(); /** * @param attrZscores * @param config * @param statsFilePathParam * @param fieldDelim * @throws IOException */ public AttributeZscoreFilter(Map<Integer, Double> attrZscores, Configuration config, String statsFilePathParam, String fieldDelim) throws IOException { NumericalAttrStatsManager statsManager = new NumericalAttrStatsManager(config, statsFilePathParam, fieldDelim); for (int attr : attrZscores.keySet()) { double zscore = attrZscores.get(attr); double mean = statsManager.getMean(attr); double stdDev = statsManager.getStdDev(attr); filters.put(attr, new ZscoreFilter(mean, stdDev, zscore)); } } /** * @param attrOrd * @param value * @return */ public boolean isWithinBound(int attrOrd, double value) { return filters.get(attrOrd).isWithinBound(value); } /** * @author pranab * */ private static class ZscoreFilter { private double lowerBound; private double upperBound; public ZscoreFilter(double mean, double stdDev, double zscore) { lowerBound = mean - zscore * stdDev; upperBound = mean + zscore * stdDev; } public boolean isWithinBound(double value) { return value >= lowerBound && value <= upperBound; } } }