/* * Copyright (C) 2015 Adrien Guille <adrien.guille@univ-lyon2.fr> * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package main.java.fr.ericlab.sondy.algo.eventdetection; import ch.epfl.lis.jmod.modularity.community.Community; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import main.java.fr.ericlab.sondy.core.app.AppParameters; import main.java.fr.ericlab.sondy.core.structures.Event; import main.java.fr.ericlab.sondy.algo.Parameter; import java.util.HashMap; import java.util.LinkedList; import java.util.Map; import java.util.logging.Level; import java.util.logging.Logger; import main.java.fr.ericlab.sondy.algo.eventdetection.edcow.EDCoWEvent; import main.java.fr.ericlab.sondy.algo.eventdetection.edcow.EDCoWKeyword; import main.java.fr.ericlab.sondy.algo.eventdetection.edcow.EDCoWModularityDetection; import main.java.fr.ericlab.sondy.algo.eventdetection.edcow.EDCoWThreshold; import main.java.fr.ericlab.sondy.core.structures.Events; /** * * @author Adrien GUILLE, ERIC Lab, University of Lyon 2 * @email adrien.guille@univ-lyon2.fr */ public class EDCoW extends EventDetectionMethod { int delta = 8; int delta2 = 48; int gamma = 5; double minTermSupport = 0.0001; double maxTermSupport = 0.01; HashMap<String,short[]> termDocMap; LinkedList<EDCoWEvent> eventList; public EDCoW(){ super(); parameters.add(new Parameter("delta",delta+"")); parameters.add(new Parameter("delta2",delta2+"")); parameters.add(new Parameter("gamma",gamma+"")); parameters.add(new Parameter("minTermSupport",minTermSupport+"")); parameters.add(new Parameter("maxTermSupport",maxTermSupport+"")); } @Override public String getName() { return "EDCoW"; } @Override public String getCitation() { return "<li><b>EDCoW:</b> J. Weng and B. Lee (2011) Event Detection in Twitter, In Proceedings of the 2011 AAAI Conference on Weblogs and Social Media (ICWSM), pp. 401-408</li>"; } @Override public String getDescription() { return "Event detection with clustering of wavelet-based signals"; } @Override public void apply() { double minTermOccur = parameters.getParameterValue("minTermSupport") * AppParameters.dataset.corpus.messageCount; double maxTermOccur = parameters.getParameterValue("maxTermSupport") * AppParameters.dataset.corpus.messageCount; delta = (int) parameters.getParameterValue("delta"); delta2 = (int) parameters.getParameterValue("delta2"); gamma = (int) parameters.getParameterValue("gamma"); int windows = (AppParameters.timeSliceB-AppParameters.timeSliceA)/delta2; termDocMap = new HashMap<>(); eventList = new LinkedList<>(); for(int i = AppParameters.timeSliceA; i < AppParameters.timeSliceB; i++){ String term = AppParameters.dataset.corpus.vocabulary.get(i); if(term.length()>1 && !AppParameters.stopwords.contains(term)){ short[] frequency = AppParameters.dataset.corpus.termFrequencies[i]; int cf = 0; for(short freq : frequency){ cf += freq; } if(cf > minTermOccur && cf < maxTermOccur){ termDocMap.put(term, frequency); } } } for(int i = 0; i < windows ;i++){ processWindow(i); } Collections.sort(eventList); events = new Events(); for(EDCoWEvent event : eventList){ events.list.add(new Event(event.getKeywordsAsString(),AppParameters.dataset.corpus.convertTimeSliceToDay((int)event.endSlice)+","+AppParameters.dataset.corpus.convertTimeSliceToDay((int)event.startSlice))); } events.setFullList(); } public void processWindow(int window){ try{ LinkedList<EDCoWKeyword> keyWords = new LinkedList<>(); int[] distributioni = AppParameters.dataset.corpus.messageDistribution; double[] distributiond = new double[delta2]; int startSlice = window*delta2; int endSlice = startSlice+delta2-1; for(int i = startSlice; i < endSlice; i++){ distributiond[i-startSlice] = (double) distributioni[i]; } for(Map.Entry<String, short[]> entry : termDocMap.entrySet()){ short frequencyf[] = entry.getValue(); double frequencyd[] = new double[delta2]; for(int i = startSlice; i < endSlice; i++){ frequencyd[i-startSlice] = (double) frequencyf[i]; } keyWords.add(new EDCoWKeyword(entry.getKey(),frequencyd,delta,distributiond)); } double[] autoCorrelationValues = new double[keyWords.size()]; for(int i = 0; i < keyWords.size(); i++){ autoCorrelationValues[i] = keyWords.get(i).getAutoCorrelation(); } EDCoWThreshold th1 = new EDCoWThreshold(); double theta1 = th1.theta1(autoCorrelationValues, gamma); // Removing trivial keywords based on theta1 LinkedList<EDCoWKeyword> keyWordsList1 = new LinkedList<>(); for(EDCoWKeyword k : keyWords){ if(k.getAutoCorrelation() > theta1){ keyWordsList1.add(k); } } for(EDCoWKeyword kw1 : keyWordsList1){ kw1.computeCrossCorrelation(keyWordsList1); } double[][] bigMatrix = new double[keyWordsList1.size()][keyWordsList1.size()]; for(int i=0; i<keyWordsList1.size(); i++){ bigMatrix[i] = keyWordsList1.get(i).getCrossCorrelation(); } //compute theta2 using the bigmatrix double theta2 = th1.theta2(bigMatrix, gamma); for(int i = 0; i < keyWordsList1.size(); i++){ for(int j = i+1; j < keyWordsList1.size(); j++){ bigMatrix[i][j] = (bigMatrix[i][j] < theta2)?0:bigMatrix[i][j]; } } EDCoWModularityDetection modularity = new EDCoWModularityDetection(keyWordsList1,bigMatrix,startSlice,endSlice); double thresholdE = 0.1; ArrayList<Community> finalArrCom= modularity.getCommunitiesFiltered(thresholdE); for(Community c : finalArrCom){ System.out.println(c.getCommunitySize()); modularity.saveEventFromCommunity(c); } eventList.addAll(modularity.getEvents()); } catch (IOException ex) { Logger.getLogger(EDCoW.class.getName()).log(Level.SEVERE, null, ex); } catch (Exception ex) { Logger.getLogger(EDCoW.class.getName()).log(Level.SEVERE, null, ex); } } }