package cn.edu.sjtu.omnilab.syslogcleanser.ppefilter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class PPDetector {
/*
* Main class to detect Ping-Pong effect and remove it from the ap records
*/
private static DataPoint calculate_ppe_measure( ArrayList<String> aps, int start, int mask_len) {
/*
* Calculating the PP measure for a list of given AP records.
*/
DataPoint dp = new DataPoint(mask_len-3, start);
if ( start+mask_len-1 < aps.size() ) {
// valid AP sequence
dp.item_total = aps.size();
Map<String, Integer> apsta = new HashMap<String, Integer>();
for ( int k = start; k < start + mask_len; k++ ){
String apname = aps.get(k);
int v;
if ( apsta.containsKey(apname))
v = apsta.get(apname).intValue()+1;
else
v = 1;
apsta.put(apname, v);
}
dp.pair_count = mask_len - apsta.size();
for ( Map.Entry<String, Integer> e : apsta.entrySet() ) {
int first = -1;
if ( e.getValue().intValue() == 1 )
dp.item_undup++;
else
for ( int k = start; k < start+mask_len; k++ )
if ( aps.get(k).equals(e.getKey())){
if ( first == -1 )
first = k;
else {
int pair_dist = k - first;
if ( dp.max_distance < pair_dist )
dp.max_distance = pair_dist;
dp.distance += pair_dist;
first = k;
}
}
}
}
return dp;
}
private static boolean is_local_maxima(double[] seq, int j){
boolean found = false;
if ( j == 0 && seq[j] != -1 ) {
if ( j+1>=seq.length || seq[j+1] == -1 || seq[j+1] < seq[j]){
found = true;
}
} else if ( j == seq.length-1 && seq[j] != -1 ) {
if ( j-1<0 || seq[j-1] == -1 || seq[j-1]<seq[j]) {
found = true;
}
} else if ( j > 0 && j < seq.length-1) {
if ( (seq[j-1] == -1 && seq[j+1]!=-1 && seq[j+1] < seq[j]) ||
(seq[j+1] == -1 && seq[j-1]!=-1 && seq[j-1] < seq[j]) ||
(seq[j-1]!=-1 && seq[j-1]<seq[j] && seq[j+1]!=-1 && seq[j+1]<seq[j])) {
found = true;
}
}
return found;
}
private static ArrayList<Integer> find_local_maxima(double[] seq) {
ArrayList<Integer> max_js = new ArrayList<Integer>();
for (int j=0; j<seq.length; j++) {
if ( j == 0 && seq[j] != -1 ) {
if ( j+1>=seq.length || seq[j+1] == -1 || seq[j+1] < seq[j])
max_js.add(j);
} else if ( j == seq.length-1 && seq[j] != -1) {
if ( j-1<0 || seq[j-1] == -1 || seq[j-1]<seq[j])
max_js.add(j);
} else if ( j > 0 && j < seq.length-1) {
if ( (seq[j-1] == -1 && seq[j+1]!=-1 && seq[j+1] < seq[j]) ||
(seq[j+1] == -1 && seq[j-1]!=-1 && seq[j-1] < seq[j]) ||
(seq[j-1]!=-1 && seq[j-1]<seq[j] && seq[j+1]!=-1 && seq[j+1]<seq[j]))
max_js.add(j);
}
}
return max_js;
}
@SuppressWarnings("unchecked")
private static List<DataPoint> find_pp_points(DataPoint[][] ppm) {
/*
* Find the optimal data points the measure matrix in three directions
*/
ArrayList<DataPoint> points = new ArrayList<DataPoint>();
// Find the local maxima points
for( int i = 0; i < ppm.length; i++) {
// Find the local maxima in row, col and diag directions
// row direction
double[] measures_row = new double[ppm[i].length];
for ( int j=0; j<ppm[i].length; j++) {
measures_row[j] = ppm[i][j].measure();
}
List<Integer> max_js = find_local_maxima(measures_row);
for ( Integer j : max_js) {
// col direction
double[] measures_col = new double[ppm.length];
// diag direction
List<Double> measures_diagArrayList = new ArrayList<Double>();
for ( int k = 0; k<ppm.length; k++) {
measures_col[k] = ppm[k][j].measure();
if (i+j-k >=0 && i+j-k < ppm[k].length)
measures_diagArrayList.add(ppm[k][i+j-k].measure());
}
double[] measures_diag = new double[measures_diagArrayList.size()];
for ( int k = 0; k<measures_diag.length; k++)
measures_diag[k] = measures_diagArrayList.get(k).doubleValue();
// Final decision
if ( is_local_maxima(measures_col, i) && is_local_maxima(measures_diag, i))
points.add(ppm[i][j]);
}
}
// Reduce the local mexima set
Set<DataPoint> removed = new HashSet<DataPoint>();
for ( DataPoint dp1 : points)
for ( DataPoint dp2 : points)
if (! removed.contains(dp1) && !removed.contains(dp2)) {
if ( dp2.max_distance > 3) {
removed.add(dp2);
continue;
}
if ( !dp1.equals(dp2)) {
int cover_num = 0;
for ( int i=dp1.start_index(); i<=dp1.stop_index(); i++)
if ( i>= dp2.start_index() && i<= dp2.stop_index())
cover_num++;
if ( cover_num > 1) {
if (dp1.mask_length() < dp2.mask_length())
removed.add(dp2);
else if (dp1.mask_length() > dp2.mask_length()) {
removed.add(dp1);
} else {
if ( dp1.start_index() <= dp2.start_index())
removed.add(dp2);
else
removed.add(dp1);
}
}
}
}
for ( DataPoint dp : (ArrayList<DataPoint>)points.clone()){
if ( removed.contains(dp)){
points.remove(dp);
}
}
return points;
}
public static ArrayList<APRecord> RemovePP(ArrayList<String> tmp_ap_list,
ArrayList<Long> tmp_start_times, ArrayList<Long> tmp_durations, ArrayList<Boolean> tmp_flags)
/*
* The core algorithm of filtering the Ping-Pong-Effect data points
*/
{
System.out.println("len: " + tmp_ap_list.size());
ArrayList<APRecord> filtered_records = new ArrayList<APRecord>();
int ap_count = tmp_durations.size();
if ( ap_count >= 3) {
// Calculating the Ping-pong-effect measure matrix
int m = ap_count-2;
int n = ap_count-2;
DataPoint[][] ppe_measure_matrix = new DataPoint[m][n];
for ( int i = 0; i < m; i++)
for ( int j =0; j < n; j++)
ppe_measure_matrix[i][j] = calculate_ppe_measure(tmp_ap_list, j, i+3);
if (DebugFlag.debug) {
// Print out the measure matrix
System.out.println("\nPPE measure matrix:");
DecimalFormat format = new DecimalFormat("0.000");
for ( int i = 0; i < m; i++) {
for ( int j =0; j < n; j++)
System.out.print(format.format(ppe_measure_matrix[i][j].measure()) + "\t");
System.out.println();
}
}
// Find the Ping-pong segments
List<DataPoint> ppps = find_pp_points(ppe_measure_matrix);
if ( DebugFlag.debug)
System.out.println("\nindex(0,0)\tstart_index(0:)\tmask_length(1:)\tscore");
for ( DataPoint dp : ppps ){
if ( DebugFlag.debug)
System.out.println("("+dp.i+","+dp.j+")"+"\t"+dp.start_index()+"\t"+dp.mask_length()+"\t"+dp.measure());
String fap_name = "";
Set<String> fap_name_set = new HashSet<String>();
long fap_start_time = -1;
long fap_duration = 0;
for ( int i=dp.start_index(); i<=dp.stop_index(); i++) {
// NOTE: we accumulate the durations from start to stop and without the
// stop, to avoid the repeated addition of the item when two segments
// have the covered number (==1)
fap_name_set.add(tmp_ap_list.get(i));
if ( fap_start_time == -1 && tmp_flags.get(i).booleanValue() == true )
fap_start_time = tmp_start_times.get(i);
if ( tmp_flags.get(i).booleanValue() == true){
fap_duration += tmp_durations.get(i);
tmp_flags.set(i, false);
}
}
for ( String s : fap_name_set) {
if ( fap_name.equals(""))
fap_name += s;
else
fap_name += (","+s);
}
filtered_records.add(new APRecord(fap_name, fap_start_time, fap_duration));
}
}
// Add the left records which are not contained by the ping-pong segments
for ( int j=0; j<tmp_flags.size(); j++)
if ( tmp_flags.get(j) )
filtered_records.add(new APRecord(tmp_ap_list.get(j), tmp_start_times.get(j), tmp_durations.get(j)));
// Order the records by start_time
Collections.sort(filtered_records,new Comparator<APRecord>(){
public int compare(APRecord arg0, APRecord arg1) {
return new Long(arg0.start_time).compareTo(new Long(arg1.start_time));
}
});
return filtered_records;
}
}