package project.core.mbeans.analysis;
import java.io.IOException;
import java.net.URL;
import java.sql.Connection;
import java.sql.SQLException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.ejb.Stateful;
import javax.naming.InitialContext;
import org.jboss.annotation.ejb.RemoteBinding;
import org.jboss.annotation.ejb.cache.simple.CacheConfig;
import edu.mit.jwi.Dictionary;
import edu.mit.jwi.IDictionary;
import edu.mit.jwi.item.IIndexWord;
import edu.mit.jwi.item.ISynset;
import edu.mit.jwi.item.IWord;
import edu.mit.jwi.item.IWordID;
import edu.mit.jwi.item.POS;
import project.client.persistence.Message;
import project.core.mbeans.database.ConnectionManagerMysqlImpl;
import project.core.mbeans.processing.MessageProcessingMBean;
import project.core.persistence.PersistenceLoaderMBean;
import project.persistence.properties.MessageWithProperties;
import project.utils.statistics.impl.Stemmer;
@Stateful
@RemoteBinding(jndiBinding="MessageBaseProcessor")
@CacheConfig(removalTimeoutSeconds=18000L)
public class MessageBaseProcessor extends ConnectionManagerMysqlImpl implements MessageBaseProcessorMBean {
private static final String WORD_INPUT_DELIMITERS = "[ \t\r\n:*)(,%^&*$#/~!;.?`'\"-]";
private static final String STR_DICTIONARY_LOCATION = "/home/alexd/workspace/proiect-diploma/input/dict";
private Connection connection = null;
private static IDictionary dictionary = null;
private PersistenceLoaderMBean loader = null;
private MessageProcessingMBean processor = null;
static {
try {
// construct the URL to the Wordnet dictionary directory
URL url = new URL ("file", null, STR_DICTIONARY_LOCATION);
dictionary = new Dictionary (url);
dictionary.open();
} catch (Exception e) {
e.printStackTrace();
}
}
@PostConstruct
public void start() {
System.out.println ("MessageBaseProcessor started");
try {
this.setConnectionParams("ebas", "gwtebas", "bachelor_project");
connection = this.getConnection();
InitialContext context = new InitialContext ();
loader = (PersistenceLoaderMBean) context.lookup("PersistenceLoader");
//
//
//
int n1 = 75, n2 = 120;
System.out.println ("n1 = " + n1 + ", n2 = " + n2);
//getWordStatistics(n1,n2);
n1 = 1234; n2 = 2854;
System.out.println ("n1 = " + n1 + ", n2 = " + n2);
//getWordStatistics(n1, n2);
} catch (Exception e) {
e.printStackTrace();
}
}
@PreDestroy
public void stop() {
dictionary.close();
try {
connection.close();
} catch (SQLException e) {
e.printStackTrace(); // TODO move to log
}
System.out.println ("MessageBaseProcessor stopped");
}
public String[] getSynonyms(String w) throws IOException {
List<String> result = new LinkedList<String> ();
IIndexWord idxWord = dictionary.getIndexWord(w, POS.NOUN);
if (idxWord == null)
return result.toArray(new String [result.size()]);
List<IWordID> list = idxWord.getWordIDs();
Iterator<IWordID> i = list.iterator();
while (i.hasNext()) {
IWordID wordID = i.next();
IWord word = dictionary.getWord(wordID);
ISynset synset = word.getSynset();
for (IWord w1 : synset.getWords ()) {
if (!w1.getLemma().toLowerCase().equals(w.toLowerCase()))
result.add(w1.getLemma().toLowerCase());
}
}
return result.toArray(new String [result.size()]);
}
public Map<String, String> getWordStatistics (int minMsgId, int maxMsgId) {
int numInvalidWords = 0;
int numStopWords = 0;
int numWords = 0;
List<String> words = new LinkedList<String> ();
Map<String,Integer> invalid = new HashMap<String, Integer> ();
SortedMap<Integer,List<String>> invalidSorted = new TreeMap<Integer, List<String>> (
new Comparator<Integer> () {
public int compare (Integer f1, Integer f2) {
if (f1 == null && f2 == null)
return 0;
else if (f1 == null && f2 != null)
return 1;
else if (f1 != null && f2 == null)
return -1;
if (f1 < f2)
return 1;
else if (f1 > f2)
return -1;
else
return 0;
}
});
try {
List<Message> messages = loader.loadMessages(minMsgId, maxMsgId);
Iterator<Message> i = messages.iterator();
while (i.hasNext()) {
Message msg = i.next();
String[] tokens = msg.getContent().split(WORD_INPUT_DELIMITERS);
numWords += tokens.length;
for (String t : tokens) {
t = t.toLowerCase();
if (!t.matches("[a-z]*")) {
numInvalidWords ++;
if (invalid.get(t) == null) {
invalid.put(t, new Integer (1));
} else {
invalid.put(t, invalid.get(t) + 1);
}
continue;
}
if (loader.isStopWord(t)) {
numStopWords ++;
continue;
}
String t1 = Stemmer.stem(t);
if (!words.contains(t1)) {
words.add(t1);
}
}
}
System.out.println("# words = " + numWords);
System.out.println("# invalid = " + numInvalidWords);
System.out.println("# stop = " + numStopWords);
System.out.println ("# after stemming = " + words.size());
float minThreshold = 0.0001f;
Iterator<String> j = invalid.keySet().iterator();
while (j.hasNext()) {
String inv = j.next();
int count = invalid.get(inv);
if (invalidSorted.get(invalid.get(inv)) == null) {
List<String> l = new LinkedList<String> ();
l.add(inv);
invalidSorted.put(count, l);
} else {
List<String> l = invalidSorted.get(count);
l.add(inv);
invalidSorted.put(count, l);
}
}
Iterator<Integer> k = invalidSorted.keySet().iterator();
while (k.hasNext()) {
Integer c = k.next();
System.out.println ("For count " + c + " :");
System.out.println (invalidSorted.get(c).toString());
}
} catch (SQLException e) {
e.printStackTrace();
}
return null;
}
}