/* * Copyright (C) 2015 Adrien Guille <adrien.guille@univ-lyon2.fr> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package main.java.fr.ericlab.sondy.algo.eventdetection.mabed; import main.java.fr.ericlab.sondy.core.app.AppParameters; import main.java.fr.ericlab.sondy.core.text.index.Indexer; import java.util.ArrayList; /** * * @author Adrien GUILLE, ERIC Lab, University of Lyon 2 * @email adrien.guille@univ-lyon2.fr */ public class MABEDComponent2 extends Thread { MABEDEvent basicEvent; int candidateWordSetSize; double theta; public MABEDEvent refinedEvent; public int threadId; public MABEDComponent2(int id, MABEDEvent be, int p, double t){ basicEvent = be; candidateWordSetSize = p; theta = t; threadId = id; } double getErdemCoefficient(short[] ref, short[] comp, int a, int b){ double scores1[] = new double[b-a+1], scores2[] = new double[b-a+1]; for(int i = a; i <= b; i++){ scores1[i-a] = ref[i]; scores2[i-a] = comp[i]; } double result; double A12 = 0, A1 = 0, A2 = 0; for(int i=2;i<scores1.length;i++){ A12 += (scores1[i]-scores1[i-1])*(scores2[i]-scores2[i-1]); A1 += (scores1[i]-scores1[i-1])*(scores1[i]-scores1[i-1]); A2 += (scores2[i]-scores2[i-1])*(scores2[i]-scores2[i-1]); } A1 = Math.sqrt(A1/(scores1.length-1)); A2 = Math.sqrt(A2/(scores1.length-1)); result = A12/((scores1.length-1)*A1*A2); return (double) (result+1)/2; } @Override public void run(){ refinedEvent = new MABEDEvent(); Indexer indexer = new Indexer(); ArrayList<String> candidateWords = indexer.getMostFrequentWords(AppParameters.dataset.corpus.getMessages(basicEvent.mainTerm,basicEvent.I.timeSliceA,basicEvent.I.timeSliceB),basicEvent.mainTerm,candidateWordSetSize); short ref[] = AppParameters.dataset.corpus.getTermFrequency(basicEvent.mainTerm); short comp[]; refinedEvent = new MABEDEvent(basicEvent.mainTerm, basicEvent.I, basicEvent.score, basicEvent.anomaly); for(String word : candidateWords){ comp = AppParameters.dataset.corpus.getTermFrequency(word); double w = getErdemCoefficient(ref, comp, basicEvent.I.timeSliceA, basicEvent.I.timeSliceB); if(w >= theta){ refinedEvent.relatedTerms.add(new MABEDWeightedTerm(word,w)); } } } }