package project.core.mbeans.search;
import java.io.IOError;
import java.io.IOException;
import java.sql.SQLException;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.PostConstruct;
import javax.ejb.Stateful;
import javax.naming.InitialContext;
import org.jboss.annotation.ejb.RemoteBinding;
import org.jboss.annotation.ejb.cache.simple.CacheConfig;
import project.client.persistence.Message;
import project.client.persistence.MessageThread;
import project.client.persistence.User;
import project.core.mbeans.analysis.MessageBaseProcessorMBean;
import project.core.persistence.PersistenceLoaderMBean;
import project.persistence.properties.MessageWithProperties;
import project.utils.statistics.MessageSimilarity;
import project.utils.statistics.impl.MessageSimilarity_Impl;
@Stateful
@RemoteBinding(jndiBinding="MessageSearch")
@CacheConfig(removalTimeoutSeconds=18000L)
public class MessageSearch implements MessageSeachMBean {
private static int MIN_NUM_WORDS = 10;
private boolean bReloadMessageEveryQuery;
private List<MessageWithProperties> messages;
private PersistenceLoaderMBean loader;
private MessageBaseProcessorMBean base;
@PostConstruct
public void start() {
try {
InitialContext context = new InitialContext ();
loader = (PersistenceLoaderMBean) context.lookup("PersistenceLoader");
base = (MessageBaseProcessorMBean) context.lookup("MessageBaseProcessor");
bReloadMessageEveryQuery = false;
messages = new LinkedList<MessageWithProperties> ();
this.loadMessages();
} catch (Exception e) {
e.printStackTrace();
}
}
public List<Message> search(String[] keywords, int limit, boolean lookUser)
throws IllegalArgumentException {
if (keywords == null)
throw new IllegalArgumentException ("query is null!");
if (keywords.length == 0)
throw new IllegalArgumentException ("query is empty!");
System.out.println ("Query: ");
for (int i = 0; i < keywords.length; i++) {
System.out.println (" > " + keywords [i]);
}
if (bReloadMessageEveryQuery)
this.loadMessages();
Map<User, Float> userRelevance = new HashMap<User, Float> ();
SortedMap<Float, List<MessageWithProperties>> sortedMap = new TreeMap<Float, List<MessageWithProperties>>
(new Comparator<Float> () {
public int compare (Float f1, Float f2) {
if (f1 == null && f2 == null)
return 0;
else if (f1 == null && f2 != null)
return 1;
else if (f1 != null && f2 == null)
return -1;
if (f1 < f2)
return 1;
else if (f1 > f2)
return -1;
else
return 0;
}
});
List<String> synonims = new LinkedList<String> ();
for (int i = 0; i < keywords.length; i++) {
try {
String[] foundSynonims = base.getSynonyms(keywords [i]);
for (int j = 0; j < foundSynonims.length; j++) {
if (!synonims.contains(foundSynonims [j])) {
synonims.add(foundSynonims [j]);
}
}
} catch (IOException e) {
}
}
for (int i = 0; i < keywords.length; i++) {
synonims.remove(keywords [i]);
}
Message queryMsg = new Message ();
Message synonimMsg = new Message ();
String str = "";
for (int i = 0; i < keywords.length; i++) {
str += keywords [i] + ";";
}
queryMsg.setFormattedContent(str);
str = "";
System.out.println ("Synonim query: ");
for (int i = 0; i < synonims.size(); i++) {
str += synonims.get(i) + ";";
System.out.println (" > " + synonims.get(i));
}
synonimMsg.setFormattedContent(str);
synonims.clear();
MessageSimilarity calculator = new MessageSimilarity_Impl ();
Iterator<MessageWithProperties> i = null;
if (lookUser) {
i = messages.iterator();
while (i.hasNext()) {
MessageWithProperties msg = i.next();
float rel = relevance(msg, calculator, queryMsg, synonimMsg);
msg.setRelevance(rel);
User u = loader.getMessageUser(msg.getId());
if (userRelevance.get(u) == null) {
userRelevance.put(u, rel);
} else {
userRelevance.put(u, userRelevance.get(u) + rel);
}
}
Iterator<User> u_i = userRelevance.keySet().iterator();
while (u_i.hasNext()) {
User u = u_i.next();
if (userRelevance.get(u) == 0.0f)
u_i.remove();
}
i = messages.iterator();
while (i.hasNext()) {
MessageWithProperties msg = i.next();
User u = loader.getMessageUser(msg.getId());
float userRel = 0.0f;
if (userRelevance.get(u) != null)
userRel = userRelevance.get(u);
float rel = 0.3f * relevance(msg, calculator, queryMsg, synonimMsg) +
0.7f * userRel;
msg.setRelevance(rel);
if (sortedMap.get(rel) == null) {
List<MessageWithProperties> l = new LinkedList<MessageWithProperties> ();
l.add(msg);
sortedMap.put(rel, l);
} else {
sortedMap.get(rel).add(msg);
}
}
} else {
i = messages.iterator();
while (i.hasNext()) {
MessageWithProperties msg = i.next();
float rel = relevance(msg, calculator, queryMsg, synonimMsg);
msg.setRelevance(rel);
if (sortedMap.get(rel) == null) {
List<MessageWithProperties> l = new LinkedList<MessageWithProperties> ();
l.add(msg);
sortedMap.put(rel, l);
} else {
sortedMap.get(rel).add(msg);
}
}
}
//
// remove irrelevant keys
//
sortedMap.remove(0.0f);
List<Message> list = new LinkedList<Message> ();
Iterator<Float> j = sortedMap.keySet().iterator();
while (j.hasNext()) {
if (limit != -1 && list.size() > limit)
break;
Float relevance = j.next();
List<MessageWithProperties> l = sortedMap.get(relevance);
Iterator<MessageWithProperties> k = l.iterator();
while (k.hasNext()) {
if (limit != -1 && list.size() > limit) {
float minRelevance = 1.0f, maxRelevance = 0.0f;
Iterator<Message> k1 = list.iterator();
while (k1.hasNext()) {
Message msg = k1.next();
if (msg.getRelevance() < minRelevance)
minRelevance = msg.getRelevance();
if (msg.getRelevance() > maxRelevance)
maxRelevance = msg.getRelevance();
}
System.out.println ("min relevance = " + minRelevance + ", max relevance = " + maxRelevance);
if (maxRelevance != minRelevance) {
float mult = 1.0f / (maxRelevance - minRelevance);
k1 = list.iterator();
while (k1.hasNext()) {
Message msg = k1.next();
msg.setRelevance((msg.getRelevance() - minRelevance) * mult);
System.out.println ("update relevance to " + msg.getRelevance());
}
}
return list;
}
MessageWithProperties msg = k.next();
list.add(createMessageFromMessageWithProperties (msg, queryMsg.getFormattedContent().split(";"),
synonimMsg.getFormattedContent().split(";")));
System.out.println ("Message " + msg.getUrl() + " : " + relevance);
}
}
float minRelevance = 1.0f, maxRelevance = 0.0f;
Iterator<Message> k = list.iterator();
while (k.hasNext()) {
Message msg = k.next();
if (msg.getRelevance() < minRelevance)
minRelevance = msg.getRelevance();
if (msg.getRelevance() > maxRelevance)
maxRelevance = msg.getRelevance();
}
System.out.println ("min relevance = " + minRelevance + ", max relevance = " + maxRelevance);
if (maxRelevance != minRelevance) {
float mult = 1.0f / (maxRelevance - minRelevance);
k = list.iterator();
while (k.hasNext()) {
Message msg = k.next();
msg.setRelevance((msg.getRelevance() - minRelevance) * mult);
System.out.println ("update relevance to " + msg.getRelevance());
}
}
return list;
}
private float relevance (MessageWithProperties message, MessageSimilarity calculator, Message queryMessage, Message synonimMessage) {
if (message.getNumWords() < MIN_NUM_WORDS)
return 0.0f;
return 0.7f * calculator.sim3(message, queryMessage) +
0.3f * calculator.sim3(message, synonimMessage);
// TODO improve this, by taking into account the strength of message's owner wrt the
// network, etc
}
private synchronized void loadMessages () {
messages.clear();
try {
Date d1 = new Date ();
messages = loader.loadMessagesWithProperties (null);
Date d2 = new Date ();
System.out.println ("Finished loading the messages (time = " + (d2.getTime() - d1.getTime()) + " miliseconds, # messages = " + messages.size() + ")");
} catch (Exception e) {
e.printStackTrace();
}
}
private Message createMessageFromMessageWithProperties (MessageWithProperties message, String[] query, String[] synonims) {
Message msg = new Message ();
String str = message.getContent();
for (int i = 0; i < query.length; i++) {
Pattern p = Pattern.compile(query [i], Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(str);
while (m.find()) {
if (str.indexOf("<span style=\"background-color:yellow\">" + m.group() + "</span>") == -1)
str = str.replaceAll(m.group(), "<span style=\"background-color:yellow\">" + m.group() + "</span>");
}
}
for (int i = 0; i < synonims.length; i++) {
Pattern p = Pattern.compile(synonims [i], Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(str);
while (m.find()) {
if (str.indexOf("<span style=\"background-color:lightblue\">" + m.group() + "</span>") == -1 &&
str.indexOf("<span style=\"background-color:yellow\">" + m.group() + "</span>") == -1)
str = str.replaceAll(m.group(), "<span style=\"background-color:lightblue\">" + m.group() + "</span>");
}
}
msg.setContent(str);
msg.setFormattedContent(message.getFormattedContent());
msg.setId(message.getId());
msg.setMessageThread(message.getMessageThread());
msg.setParent(message.getParent());
msg.setPublishDate(message.getPublishDate());
msg.setUrl(message.getUrl());
msg.setUser(message.getUser());
msg.setMessageThread(loader.getMessageThread(msg.getId()));
msg.setUser(loader.getMessageUser(msg.getId()));
msg.setRelevance(message.getRelevance());
return msg;
}
}