/*
* beymani: Outlier and anamoly detection
* Author: Pranab Ghosh
*
* Licensed under the Apache License, Version 2.0 (the "License"); you
* may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package org.beymani.predictor;
import java.io.IOException;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.chombo.storm.Cache;
import org.chombo.storm.MessageQueue;
import org.chombo.util.ConfigUtility;
import org.chombo.util.MedianStatsManager;
import org.chombo.util.Utility;
/**
* @author pranab
*
*/
public class RobustZscorePredictor extends ModelBasedPredictor {
private int[] idOrdinals;
private int[] attrOrdinals;
private MedianStatsManager medStatManager;
private String fieldDelim;
private double[] attrWeights;
protected MessageQueue outQueue;
protected Cache cache;
/**
* Storm usage
* @param config
* @param idOrdinalsParam
* @param attrListParam
* @param fieldDelimParam
* @param attrWeightParam
* @param medModelKeyParam
* @param madModelKeyParam
* @param scoreThresholdParam
* @throws IOException
*/
public RobustZscorePredictor(Map config, String idOrdinalsParam, String attrListParam, String fieldDelimParam,
String attrWeightParam, String medModelKeyParam, String madModelKeyParam, String scoreThresholdParam)
throws IOException {
idOrdinals = ConfigUtility.getIntArray(config, idOrdinalsParam);
attrOrdinals = ConfigUtility.getIntArray(config, attrListParam);
fieldDelim = ConfigUtility.getString(config, fieldDelimParam, ",");
outQueue = MessageQueue.createMessageQueue(config, config.get("output.queue").toString());
cache = Cache.createCache(config);
String medlKey = config.get(medModelKeyParam).toString();
String medContent = cache.get(medlKey);
String madlKey = config.get(madModelKeyParam).toString();
String madContent = cache.get(madlKey);
medStatManager= new MedianStatsManager(medContent, madContent, ",", idOrdinals);
attrWeights = ConfigUtility.getDoubleArray(config, attrWeightParam);
scoreThreshold = ConfigUtility.getDouble(config, scoreThresholdParam, 3.0);
realTimeDetection = true;
}
/**
* Hadoop MR usage for robust zscore
* @param config
* @param idOrdinalsParam
* @param attrListParam
* @param medFilePathParam
* @param madFilePathParam
* @param fieldDelimParam
* @throws IOException
*/
public RobustZscorePredictor(Configuration config, String idOrdinalsParam, String attrListParam,
String medFilePathParam, String madFilePathParam, String fieldDelimParam, String attrWeightParam,
String scoreThresholdParam) throws IOException {
idOrdinals = Utility.intArrayFromString(config.get(idOrdinalsParam));
attrOrdinals = Utility.intArrayFromString(config.get(attrListParam));
medStatManager = new MedianStatsManager(config, medFilePathParam, madFilePathParam,
",", idOrdinals);
fieldDelim = config.get(fieldDelimParam, ",");
//attribute weights
attrWeights = Utility.doubleArrayFromString(config.get(attrWeightParam), fieldDelim);
scoreThreshold = Double.parseDouble( config.get( scoreThresholdParam));
}
@Override
public double execute(String entityID, String record) {
double score = 0;
String[] items = record.split(fieldDelim);
int i = 0;
double totalWt = 0;
for (int ord : attrOrdinals) {
double val = Double.parseDouble(items[ord]);
if (null != idOrdinals) {
String compKey = Utility.join(items, idOrdinals, fieldDelim);
score += (Math.abs( val - medStatManager.getKeyedMedian(compKey, ord) ) /
medStatManager.getKeyedMedAbsDivergence(compKey, ord)) * attrWeights[i];
} else {
score += (Math.abs( val - medStatManager.getMedian(ord)) / medStatManager.getMedAbsDivergence(ord)) * attrWeights[i];
}
}
score /= totalWt ;
scoreAboveThreshold = score > scoreThreshold;
if (realTimeDetection && scoreAboveThreshold) {
//write if above threshold
outQueue.send(entityID + " " + score);
}
return score;
}
}