package ch.epfl.gsn.monitoring;
import java.util.Map;
import java.util.TreeMap;
import java.util.HashMap;
import java.util.Hashtable;
import java.util.ArrayList;
import java.util.Iterator;
import java.sql.*;
import org.slf4j.LoggerFactory;
import ch.epfl.gsn.Main;
import ch.epfl.gsn.beans.DataField;
import ch.epfl.gsn.beans.DataTypes;
import ch.epfl.gsn.beans.VSensorConfig;
import ch.epfl.gsn.monitoring.Anomaly;
import ch.epfl.gsn.storage.StorageManager;
import ch.epfl.gsn.vsensor.AbstractVirtualSensor;
import org.slf4j.Logger;
/*
* Each AbstractVirtualSensor will have a reference to its instance of AnomalyDetector
*/
public class AnomalyDetector implements Monitorable {
private static final transient Logger logger = LoggerFactory.getLogger (AnomalyDetector.class);
/* Reference to the VirtualSensor this instance belongs to */
private AbstractVirtualSensor sensor;
/* OutputStructure of the VirtualSensor */
private DataField [] fields;
/* HashMap functions
* Key : Function
* Value : ArrayList of Anomaly on which the function has to be applied
*/
private HashMap <String, ArrayList<Anomaly>> functions = new HashMap<String, ArrayList<Anomaly>>();
/* A list of supported functions*/
private ArrayList <String> supportedFunctions = new ArrayList <String> () {{
add("positive_outlier");
add("negative_outlier");
add("iqr");
add("unique");
}};
// Constant flag for specifiying driection of outlier | Helps in code readability
private static final boolean POSITIVE = true;
private static final boolean NEGATIVE = false;
public AnomalyDetector (AbstractVirtualSensor sensor) {
this.sensor = sensor;
this.fields = sensor.getVirtualSensorConfiguration().getOutputStructure();
initAnomalyParams();
}
/* Parsing anomaly parameters from the VSP file configuration provided
*
* Valid Key Format : anomaly.function.field[.groupByField]
* WHERE groupByField is optional
* Function could be iqr, positive_outlier etc.
* Valid Value format : interval[,value]
* WHERE value is optional (needed for outliers)
* interval = -1 if all the historical data is to be used
*
* Value is the threshold and interval is the recent time window over which
* we look for anomalies. Interval should be specified as for e.g. 5h (last 5 hours)
* or 3m (last 3 minutes)
* */
/* TODO: ALL input should be parsed and validated here
* Parsing should not be left for runtime
*/
private void initAnomalyParams() {
TreeMap <String, String> params = sensor.getVirtualSensorConfiguration().
getMainClassInitialParams();
if (params == null)
return;
for (Map.Entry<String,String> entry : params.entrySet()) {
String key = entry.getKey().trim(); // key should be of the form anomaly.function.field[.groupByField]
String value = entry.getValue().trim(); // Value should be of the form interval[,value]
if (!key.startsWith("anomaly.")) // For a parameter defined for anamaly, key (name) has to start with string "anomaly."
continue;
String parts [] = key.split("\\."); // Tokenizing String with '.'
String function = null, fieldName=null, groupBy = null;
if (parts.length < 3 || parts.length > 4) {
logger.info("ANOMALY-PARSING-ERROR ["+key+"="+value+"] "
+ "Key: " + key + " should be of the form anomaly.function.field[.groupByField]");
continue;
}
try {
function = parts[1];
if (!supportedFunctions.contains(function)) {
logger.info ("ANOMALY-PARSING-ERROR ["+key+"="+value+"] "
+ "Function: " + function + " not supported");
continue;
}
fieldName = parts[2];
DataField field = getDataField(fieldName);
if (field == null) {
logger.info ("ANOMALY-PARSING-ERROR ["+ key+"="+value+"] "
+ "Field: " + fieldName + " is invalid");
continue; // Move to next anomaly
}
DataField groupByField = null;
if (parts.length == 4) {
groupBy = parts[3];
groupByField = getDataField(groupBy);
if (groupByField == null) {
logger.info ("ANOMALY-PARSING-ERROR ["+ key+"="+value+"] "
+ "GroupBy: " + groupByField + " is invalid");
continue;
}
}
String valParts [] = value.split(",");
Anomaly anomaly;
if (valParts.length == 1) // Threshold is not provided
anomaly = new Anomaly (function,field,valParts[0],groupByField); //valParts[0] is the interval
else if (valParts.length == 2) // valParts[0] is interval and valParts[1] is threshold value
anomaly = new Anomaly (function,field,valParts[0], groupByField,valParts[1]);
else {
logger.info ("ANOMALY-PARSING-ERROR ["+ key+"="+value+"] "
+ "Value: " + value + " should be of the form value,interval");
break;
}
if (!functions.containsKey(function)) {
functions.put(function, new ArrayList <Anomaly>());
}
functions.get(function).add(anomaly);
logger.info (anomaly.toString());
} catch (Exception e) {
logger.info ("ANOMALY-PARSING-ERROR ["+key+" : " +value+"]");
logger.info (e.getMessage());
continue;
}
}
}
private DataField getDataField (String fieldName) {
for (int i = 0; i < fields.length; i++) {
if (fields[i].getName().equals(fieldName))
return fields[i];
}
return null;
}
/*
* Executes query for the positive or negative outlier and retrieves the no of anomalies
*/
private void countOutliers (Hashtable <String, Object> stat, boolean direction) throws SQLException {
if (direction == POSITIVE && !functions.containsKey("positive_outlier"))
return;
if (direction == NEGATIVE && !functions.containsKey("negative_outlier"))
return;
ArrayList <Anomaly> anomalies = null;
if (direction == POSITIVE)
anomalies = functions.get("positive_outlier");
else
anomalies = functions.get("negative_outlier");
if (anomalies == null || anomalies.size() == 0)
return;
StorageManager storageMan = Main.getStorage (sensor.getVirtualSensorConfiguration().getName());
Connection con = null;
PreparedStatement ps = null;
ResultSet rs = null;
// Get connection, process anomalies of this function, close connection
// Any SQLException thrown is propogated up the stack
con = storageMan.getConnection();
Iterator <Anomaly> iterator = anomalies.iterator();
while ( iterator.hasNext() ) {
Anomaly anomaly = iterator.next();
DataField field = anomaly.getField();
String query;
if (anomaly.isGroupBy())
query = getOutlierQueryByGroup (sensor.getVirtualSensorConfiguration().getName().toLowerCase(), field,
anomaly.getGroupByField(), direction);
else
query = getOutlierQuery (sensor.getVirtualSensorConfiguration().getName().toLowerCase(), field,
direction);
try {
ps = con.prepareStatement(query);
switch (field.getDataTypeID()) {
case DataTypes.DOUBLE:
ps.setDouble (1, Double.parseDouble(anomaly.getValue()));
break;
case DataTypes.SMALLINT:
ps.setShort(1, Short.parseShort (anomaly.getValue()));
break;
case DataTypes.INTEGER:
ps.setInt(1, Integer.parseInt (anomaly.getValue()));
break;
case DataTypes.FLOAT:
ps.setFloat(1, Float.parseFloat (anomaly.getValue()));
break;
default:
logger.info ("ANOMALY-EXECUTION-ERROR [anomaly."+ anomaly.getFunction()+"."+
field.getName()+"="+anomaly.getValue()+","+anomaly.getTimeStr()+"] "+ "Field: " +
field.getName() + " datatype " + field.getDataTypeID() + " not stuppored");
iterator.remove();
continue;
}
long timeStamp = this.getTimeStamp(anomaly.getTime());
ps.setDouble(2, timeStamp);
rs = ps.executeQuery();
if (anomaly.isGroupBy()) {
while (rs.next()) {
stat.put(this.getMetricName (anomaly) +"."+ rs.getString(1)+".gauge", rs.getString(2));
}
}
else {
if (rs.next()) { // Putting monitoring result for this anomaly in the HashMap stat
stat.put( this.getMetricName(anomaly)+".gauge", rs.getString(1));
}
}
} catch (NumberFormatException e) { // In case of number format exception, anomaly specification is erroneous, hence removing it
//TODO: For other datatpyes, there could be different exception for formatting
logger.info ("ANOMALY-EXECUTION-ERROR [anomaly."+ anomaly.getFunction()+"."+field.getName()+
"="+anomaly.getValue()+","+anomaly.getTimeStr()+"] "+ "Value: " + anomaly.getValue() + " Invalid number format");
iterator.remove();
continue;
}
finally {
if (rs!= null)
rs.close();
if (ps!=null)
ps.close();
}
}
con.close();
}
// Returns SQL query for outliers
private String getOutlierQuery ( String tableName, DataField field, boolean direction ) {
StringBuilder toReturn = new StringBuilder ();
toReturn.append("SELECT COUNT(*) FROM ").append(tableName).append( " WHERE " + field.getName());
// Outier could be positive or negative
if (direction == POSITIVE)
toReturn.append(" > ");
else
toReturn.append(" < ");
toReturn.append("? AND timed > ?"); // ) as maxTable"
return toReturn.toString();
}
private String getOutlierQueryByGroup (String tableName, DataField field, DataField groupBy, boolean direction) {
StringBuilder toReturn = new StringBuilder ();
toReturn.append("SELECT " + groupBy.getName() + ", COUNT(*) FROM ").append(tableName).append(" WHERE " + field.getName());
if (direction == POSITIVE)
toReturn.append(" > ");
else
toReturn.append(" < ");
toReturn.append("? AND timed > ?");
toReturn.append(" GROUP BY " + groupBy.getName());
return toReturn.toString();
}
private void interQuartileRange (Hashtable <String, Object> stat) throws SQLException {
if (!functions.containsKey("iqr"))
return;
ArrayList <Anomaly> anomalies = functions.get("iqr");
// TODO: remove key iqr
if (anomalies == null || anomalies.size() == 0)
return;
StorageManager storageMan = Main.getStorage(sensor.getVirtualSensorConfiguration().getName());
Connection con = null;
PreparedStatement ps = null;
ResultSet rs = null;
con = storageMan.getConnection();
Iterator <Anomaly> iterator = anomalies.iterator();
while (iterator.hasNext()) {
Anomaly anomaly = iterator.next();
DataField field = anomaly.getField();
String query;
if (anomaly.isGroupBy())
query = getIQRQueryByGroup (sensor.getVirtualSensorConfiguration().getName().toLowerCase(),field,anomaly.getGroupByField());
else
query = getIQRQuery(sensor.getVirtualSensorConfiguration().getName().toLowerCase(), field);
ps = con.prepareStatement(query);
long timestamp = 0;
//TODO: Improve IQR SQL Query
timestamp = this.getTimeStamp(anomaly.getTime());
ps.setDouble(1, timestamp);
ps.setDouble(2, timestamp);
ps.setDouble(3, timestamp);
ps.setDouble(4, timestamp);
rs = ps.executeQuery();
if (anomaly.isGroupBy()) {
while (rs.next()) {
stat.put (this.getMetricName (anomaly) +"."+ rs.getString(1)+".gauge", rs.getString (2));
}
}
else {
if (rs.next()) {
stat.put(this.getMetricName(anomaly)+".gauge", rs.getString(1));
}
}
if (rs!=null)
rs.close();
if (ps!=null)
ps.close();
}
con.close();
}
//TODO: A complex IQR Query. It should be improved.
//TODO : Comment the query
private String getIQRQuery (String tableName, DataField field) {
StringBuilder toReturn = new StringBuilder ("SELECT ");
toReturn.append("(SELECT " + field.getName() + " FROM ( ");
toReturn.append("SELECT ROW_NUMBER() OVER (ORDER BY " + field.getName() + " ASC) AS ROWNUMBER, " + field.getName());
toReturn.append(" FROM " + tableName + " WHERE timed > ?");
toReturn.append(" ) AS q3");
toReturn.append(" WHERE ROWNUMBER = (SELECT COUNT(*) FROM " + tableName +
" WHERE timed > ?)*75/100)");
toReturn.append(" - ");
toReturn.append("(SELECT " + field.getName() + " FROM ( ");
toReturn.append("SELECT ROW_NUMBER() OVER (ORDER BY " + field.getName() + " ASC) AS ROWNUMBER, " + field.getName());
toReturn.append(" FROM " + tableName + " WHERE timed > ?");
toReturn.append(" ) AS q1");
toReturn.append(" WHERE ROWNUMBER = (SELECT COUNT(*) FROM " + tableName +
" WHERE timed > ?) *25/100) AS IQR");
return toReturn.toString();
}
private String getIQRQueryByGroup (String tableName, DataField field, DataField groupBy) {
String fieldName = field.getName();
String groupByName = groupBy.getName();
StringBuilder toReturn = new StringBuilder ("SELECT q1." + groupByName + ", (q3." + fieldName +" - q1." + fieldName +") IQR ");
toReturn.append ("FROM ");
toReturn.append ("(SELECT doo." + fieldName + ", doo." + groupByName + ", doo.row ");
toReturn.append ("FROM ");
toReturn.append ("(SELECT ROW_NUMBER() OVER (PARTITION BY " + groupByName + " ORDER BY " + groupByName + ","
+ fieldName + ") as row, " + groupByName + ", " + fieldName);
toReturn.append (" FROM " + tableName + " WHERE timed > ?) doo, ");
toReturn.append ("(SELECT max(foo.row)*25/100 AS max, " + groupByName);
toReturn.append (" FROM (SELECT ROW_NUMBER() OVER (PARTITION BY " + groupByName + " ORDER BY " + groupByName + ","
+ fieldName + ") as row, " + groupByName + ", " + fieldName);
toReturn.append (" FROM " + tableName + " WHERE timed > ?) foo ");
toReturn.append (" GROUP BY " + groupByName + ") boo ");
toReturn.append ("WHERE doo." + groupByName + "=" + "boo." + groupByName);
toReturn.append (" AND doo.row = boo.max) q1, ");
toReturn.append ("(SELECT doo." + fieldName + ", doo." + groupByName + ", doo.row ");
toReturn.append ("FROM ");
toReturn.append ("(SELECT ROW_NUMBER() OVER (PARTITION BY " + groupByName + " ORDER BY " + groupByName + ","
+ fieldName + ") as row, " + groupByName + ", " + fieldName);
toReturn.append (" FROM " + tableName + " WHERE timed > ?) doo, ");
toReturn.append ("(SELECT max(foo.row)*75/100 AS max, " + groupByName);
toReturn.append (" FROM (SELECT ROW_NUMBER() OVER (PARTITION BY " + groupByName + " ORDER BY " + groupByName + ","
+ fieldName + ") as row, " + groupByName + ", " + fieldName);
toReturn.append (" FROM " + tableName + " WHERE timed > ?) foo ");
toReturn.append (" GROUP BY " + groupByName + ") boo ");
toReturn.append ("WHERE doo." + groupByName + "=" + "boo." + groupByName);
toReturn.append (" AND doo.row = boo.max) q3 ");
toReturn.append("WHERE q1." + groupByName + " = q3." +groupByName +";");
return toReturn.toString();
}
// Count unique values of a field over a specified timed-interval
private void countUnique (Hashtable <String, Object> stat) throws SQLException {
if (!functions.containsKey("unique"))
return;
ArrayList <Anomaly> anomalies = functions.get("unique");
// TODO: remove key iqr
if (anomalies == null || anomalies.size() == 0) {
functions.remove("unique");
return;
}
StorageManager storageMan = Main.getStorage(sensor.getVirtualSensorConfiguration().getName());
Connection con = null;
PreparedStatement ps = null;
ResultSet rs = null;
con = storageMan.getConnection();
Iterator <Anomaly> iterator = anomalies.iterator();
while (iterator.hasNext()) {
Anomaly anomaly = iterator.next();
DataField field = anomaly.getField();
String query;
if (anomaly.isGroupBy())
query = getUniqueQueryByGroup(sensor.getVirtualSensorConfiguration().getName().toLowerCase(), field, anomaly.getGroupByField());
else
query = getUniqueQuery(sensor.getVirtualSensorConfiguration().getName().toLowerCase(), field);
ps = con.prepareStatement(query);
long timeStamp = this.getTimeStamp(anomaly.getTime());
//logger.info ("COUNT UNIQUE timestamp : " + timeStamp);
ps.setLong(1,timeStamp);
rs = ps.executeQuery();
if (anomaly.isGroupBy()) {
while (rs.next()) {
stat.put (this.getMetricName(anomaly) +"."+ rs.getString(1)+".gauge",rs.getString(2));
}
}
else {
if(rs.next()) {
stat.put(this.getMetricName(anomaly)+".gauge", rs.getString(1));
}
}
if (rs!=null)
rs.close();
if (ps!=null)
ps.close();
}
con.close();
}
// Generates SQL query for the number of unique values of a field
private String getUniqueQuery ( String tableName, DataField field) {
StringBuilder toReturn = new StringBuilder ("SELECT COUNT (*) FROM (");
toReturn.append("SELECT DISTINCT " + field.getName() + " FROM ").append(tableName);
toReturn.append(" WHERE timed > ?) AS maxTabel");
return toReturn.toString();
}
private String getUniqueQueryByGroup ( String tableName, DataField field, DataField groupBy) {
StringBuilder toReturn = new StringBuilder ("SELECT " + groupBy.getName()+", COUNT(*) ");
toReturn.append("FROM ( SELECT DISTINCT " + field.getName() + ", station FROM ").append(tableName);
toReturn.append(" WHERE timed > ?) AS temp ");
toReturn.append("GROUP BY " + groupBy.getName());
return toReturn.toString();
}
// Returns the time stamp by subtractive the windowSize from the current time
private long getTimeStamp (long windowSize) {
// Counter intuitively, zero represents we will look into all the stored data for anomalies
if (windowSize ==-1)
return -1;
return System.currentTimeMillis() - windowSize;
}
public String toString () {
StringBuilder sb = new StringBuilder();
sb.append("\n");
for (Map.Entry <String, ArrayList <Anomaly>> entry : functions.entrySet ()) {
String function = entry.getKey();
ArrayList <Anomaly> anomalies = entry.getValue();
sb.append(function);
sb.append("\n");
for (Anomaly anomaly : anomalies ) {
sb.append("\t");
sb.append(anomaly.toString());
sb.append("\n");
}
}
return sb.toString();
}
public String getMetricName (Anomaly anomaly) {
StringBuilder sb = new StringBuilder ("vs."+sensor.getVirtualSensorConfiguration().getName().replaceAll("\\,","_" )+".output");
sb.append(".anomaly." + anomaly.getFunction() + "." + anomaly.getField().getName());
if (anomaly.getFunction().equals ("positive_outlier") || anomaly.getFunction().equals ("negative_outlier"))
sb.append("." + anomaly.getValue().trim());
if (anomaly.isGroupBy())
sb.append("." + anomaly.getGroupByField().getName());
return sb.toString();
}
public Hashtable <String, Object> getStatistics () {
Hashtable <String, Object> stat = new Hashtable <String, Object>();
try {
countOutliers (stat, POSITIVE);
countOutliers (stat, NEGATIVE);
interQuartileRange(stat);
countUnique(stat);
} catch (SQLException e) { logger.error(e.getMessage(), e); }
return stat;
}
}