/* * Copyright (C) 2015 Adrien Guille <adrien.guille@univ-lyon2.fr> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package main.java.fr.ericlab.sondy.algo.eventdetection.mabed; import main.java.fr.ericlab.sondy.core.app.AppParameters; import main.java.fr.ericlab.sondy.core.utils.ArrayUtils; import java.util.ArrayList; import java.util.LinkedList; /** * * @author Adrien GUILLE, ERIC Lab, University of Lyon 2 * @email adrien.guille@univ-lyon2.fr */ public class MABEDComponent1 extends Thread { public MABEDEventList events = new MABEDEventList(); int from; int to; int minTermOccur; int maxTermOccur; int threadId; public MABEDComponent1(int id, int a, int b, int min, int max){ from = a; to = b; minTermOccur = min; maxTermOccur = max; threadId = id; } float expectation(int timeSlice, float tmf){ return AppParameters.dataset.corpus.messageDistribution[timeSlice]*(tmf/AppParameters.dataset.corpus.messageCount); } float anomaly(float expectation, float real){ return real - expectation; } @Override public void run() { int m = AppParameters.timeSliceB; for(int t = from; t <= to; t++){ String term = AppParameters.dataset.corpus.vocabulary.get(t); if(term.length() > 2 && !AppParameters.stopwords.contains(term)){ float[] gf, mf; gf = ArrayUtils.toFloatArray(AppParameters.dataset.corpus.getTermFrequency(term)); mf = ArrayUtils.toFloatArray(AppParameters.dataset.corpus.getTermFrequency(term)); int tmf = (int)ArrayUtils.sum(mf,0,m-1); int tgf = (int)ArrayUtils.sum(gf,0,m-1); if(tgf>minTermOccur && tgf<maxTermOccur){ float expectation; float scoreSequence[] = new float[m]; for(int i = AppParameters.timeSliceA; i < m; i++){ expectation = expectation(i,tmf); scoreSequence[i] = anomaly(expectation, mf[i]); } LinkedList<MABEDTimeInterval> I = new LinkedList<>(); LinkedList<Float> L = new LinkedList<>(); LinkedList<Float> R = new LinkedList<>(); ArrayList<Float> anomaly = new ArrayList<>(); for(int i = AppParameters.timeSliceA; i < m; i++){ anomaly.add(scoreSequence[i]>0?scoreSequence[i]:0); if(scoreSequence[i]>0){ int k = I.size(); float Lk = 0, Rk = ArrayUtils.sum(scoreSequence,0,i); if(i>0){ Lk = ArrayUtils.sum(scoreSequence,0,i-1); } int j = 0; boolean foundJ = false; for(int l=k-1; l>=0 && !foundJ; l--){ if(L.get(l)<Lk){ foundJ = true; j = l; } } if(foundJ && R.get(j)<Rk){ MABEDTimeInterval Ik = new MABEDTimeInterval(I.get(j).timeSliceA,i); for(int p = j; p<k; p++){ I.removeLast(); L.removeLast(); R.removeLast(); } k = j; I.add(Ik); L.add(ArrayUtils.sum(scoreSequence,0,Ik.timeSliceA-1)); R.add(ArrayUtils.sum(scoreSequence,0,Ik.timeSliceB)); }else{ I.add(new MABEDTimeInterval(i,i)); L.add(Lk); R.add(Rk); } } } if(I.size()>0){ MABEDTimeInterval maxI = I.get(0); for(MABEDTimeInterval Ii : I){ if(ArrayUtils.sum(scoreSequence,Ii.timeSliceA,Ii.timeSliceB)>ArrayUtils.sum(scoreSequence,maxI.timeSliceA,maxI.timeSliceB)){ maxI.timeSliceA = Ii.timeSliceA; maxI.timeSliceB = Ii.timeSliceB; } } double score = ArrayUtils.sum(scoreSequence,I.get(0).timeSliceA,I.get(0).timeSliceB); events.add(new MABEDEvent(term,maxI,score,anomaly)); } } } } } }