/*
* Copyright (c) 2011, SOCIETIES Consortium (WATERFORD INSTITUTE OF TECHNOLOGY (TSSG), HERIOT-WATT UNIVERSITY (HWU), SOLUTA.NET
* (SN), GERMAN AEROSPACE CENTRE (Deutsches Zentrum fuer Luft- und Raumfahrt e.V.) (DLR), Zavod za varnostne tehnologije
* informacijske držbe in elektronsko poslovanje (SETCCE), INSTITUTE OF COMMUNICATION AND COMPUTER SYSTEMS (ICCS), LAKE
* COMMUNICATIONS (LAKE), INTEL PERFORMANCE LEARNING SOLUTIONS LTD (INTEL), PORTUGAL TELECOM INOAÇÃO, SA (PTIN), IBM Corp.,
* INSTITUT TELECOM (ITSUD), AMITEC DIACHYTI EFYIA PLIROFORIKI KAI EPIKINONIES ETERIA PERIORISMENIS EFTHINIS (AMITEC), TELECOM
* ITALIA S.p.a.(TI), TRIALOG (TRIALOG), Stiftelsen SINTEF (SINTEF), NEC EUROPE LTD (NEC))
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following
* conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING,
* BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
* NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.societies.orchestration.cpa.impl.comparison;
import gate.*;
import gate.corpora.DocumentContentImpl;
import gate.corpora.DocumentImpl;
import gate.creole.ANNIEConstants;
import gate.creole.ExecutionException;
import gate.creole.SerialAnalyserController;
import gate.util.GateException;
import gate.util.Out;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.societies.api.activity.IActivity;
import org.societies.orchestration.cpa.impl.SocialGraphVertex;
import java.io.File;
import java.net.URISyntaxException;
import java.net.URL;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
/**
* Created with IntelliJ IDEA.
* User: bjornmagnus.mathisen@sintef.no
* Date: 12/5/12
* Time: 1:32 PM
*/
public class ContentComparator implements ActorComparator {
private SerialAnalyserController annieController;
private URL pluginURL;
private int annotationsDone = 0;
private int fruitfulannotationsDone = 0;
private static Logger LOG = LoggerFactory
.getLogger(ContentComparator.class);
public ContentComparator(){
try {
initAnnie();
} catch (GateException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
}
public void initAnnie() throws GateException {
Out.prln("Initialising ANNIE... ");
if(Gate.getPluginsHome()==null)
Gate.setPluginsHome(new File("."));
try {
Gate.setSiteConfigFile(new File(ContentComparator.class.getClassLoader().getResource("gate.xml").toURI()));
} catch (URISyntaxException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
Gate.init();
File gateHome = Gate.getGateHome();
pluginURL = ContentComparator.class.getClassLoader().getResource("plugins/ANNIE/");
File pluginsHome = new File(".", "plugins");
/* try {*/
Gate.getCreoleRegister().registerDirectories(pluginURL);
//Gate.getCreoleRegister().registerDirectories(new File(pluginsHome, "/ANNIE").toURL());
/* } catch (MalformedURLException e) {
e.printStackTrace();
}*/
Out.prln("...GATE initialised");
// create a serial analyser controller to run ANNIE with
annieController =
(SerialAnalyserController) Factory.createResource(
"gate.creole.SerialAnalyserController", Factory.newFeatureMap(),
Factory.newFeatureMap(), "ANNIE_" + Gate.genSym()
);
// load each PR as defined in ANNIEConstants
for(int i = 0; i < ANNIEConstants.PR_NAMES.length; i++) {
FeatureMap params = Factory.newFeatureMap(); // use default parameters
ProcessingResource pr = (ProcessingResource)
Factory.createResource(ANNIEConstants.PR_NAMES[i], params);
// add the PR to the pipeline controller
annieController.add(pr);
} // for each ANNIE PR
Out.prln("...ANNIE loaded");
}
public Corpus makeCorpus(String str) throws GateException {
DocumentContentImpl content = new DocumentContentImpl(str);
Document doc = new DocumentImpl();
doc.setContent(content);
Corpus corpus = (Corpus) Factory.createResource("gate.corpora.CorpusImpl");
FeatureMap params = Factory.newFeatureMap();
params.put("preserveOriginalContent", new Boolean(true));
params.put("collectRepositioningInfo", new Boolean(true));
corpus.add(doc);
return corpus;
}
@Override
public double compare(SocialGraphVertex member1, SocialGraphVertex member2, List<IActivity> activityDiff) {
double ret = 0;
String member1Text = ""; int m1count = 0;
String member2Text = ""; int m2count = 0;
long m1lastTimeStamp = 0;
long m2lastTimeStamp = 0;
for(IActivity act: activityDiff){
if(contains(member1,act)){
m1count ++;
//add new link (or add weight to an old link)
member1Text += ". "+act.getObject();
if(Long.parseLong(act.getPublished())>m1lastTimeStamp)
m1lastTimeStamp = Long.parseLong(act.getPublished());
}
if( contains(member2,act) ) {
m2count ++;
member2Text += ". "+act.getObject();
if(Long.parseLong(act.getPublished())>m2lastTimeStamp)
m2lastTimeStamp = Long.parseLong(act.getPublished());
}
}
if(member1Text.length() == 0 || member1Text.length() == 0)
return 0;
LOG.info("USER: "+member1.getName()+" m1lastTimeStamp: "+m1lastTimeStamp+" member1.getTimestamp(): "+member1.getTimestamp());
LOG.info("USER: "+member2.getName()+" m2lastTimeStamp: "+m2lastTimeStamp+" member2.getTimestamp(): "+member2.getTimestamp());
/* LOG.info("comparing two members m1 totaltextlength: "+member1Text.length()+" numberof: "
+m1count+ " ratio: "+((double)member1Text.length())/((double)m1count));
LOG.info("comparing two members m2 totaltextlength: "+member2Text.length()+" numberof: "
+m2count+ " ratio: "+((double)member2Text.length())/((double)m2count));*/
Map<String, List<String>> m1annotations = null;
long start=0,timespent=0;
if(m1lastTimeStamp>member1.getTimestamp()){
start = System.currentTimeMillis();
member1.merge(getAnnotations(member1Text)); //cache the extraction
member1.setTimestamp(m1lastTimeStamp);
m1annotations = member1.getTerms();
timespent = (System.currentTimeMillis()-start);
LOG.info("annotating "+member1Text.length()+" time spent: "+timespent+" per char: "
+((double)(System.currentTimeMillis()-start))/((double)member1Text.length()));
Map<String, List<String>> currentAnnotationSets = member1.getTerms();
annotationsDone++;
} else {
m1annotations = member1.getTerms();
}
Map<String, List<String>> m2annotations = null;
if(m2lastTimeStamp>member2.getTimestamp()){
start = System.currentTimeMillis();
member2.merge(getAnnotations(member2Text)); //cache the extraction
member2.setTimestamp(m2lastTimeStamp);
m2annotations = member2.getTerms();
timespent = (System.currentTimeMillis()-start);
LOG.info("annotating "+member2Text.length()+" time spent: "+timespent+" per char: "
+((double)(System.currentTimeMillis()-start))/((double)member2Text.length()));
annotationsDone++;
} else {
m2annotations = member2.getTerms();
}
//check if the two members have talked about the same locations.
List<String> l1 = m1annotations.get("Location");
List<String> l2 = m2annotations.get("Location");
/*List<String> l3 = m2annotations.get("Address");*/
Iterator<String> it = null;
if(l1 != null && l2!=null){
it = l1.iterator();
while(it.hasNext()){
if(l2.contains(it.next()))
ret++;
}
}
//check if the two members have talked about the same persons.
List<String> p1 = m1annotations.get("Person");
List<String> p2 = m2annotations.get("Person");
if(p1!=null && p2!=null){
it = p1.iterator();
while(it.hasNext()){
if(p2.contains(it.next()))
ret++;
}
}
//look for ..
LOG.info("fruitfulannotationsDone: "+fruitfulannotationsDone+" annotationsDone: "+annotationsDone+" ret: "+ret);
return ret;
}
private Map<String, AnnotationSet> getAnnotations(String str){
LOG.info("trying to annotate str of size: "+str.length());
Map<String, AnnotationSet> ret = new HashMap<String, AnnotationSet >();
if(str == null || str.length() == 0){
LOG.info("str is null escaping..");
return ret;
}
Corpus corp = null;
try {
corp = this.makeCorpus(str);
} catch (GateException e) {
e.printStackTrace();
}
annieController.setCorpus(corp);
try {
annieController.execute();
} catch (ExecutionException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
Document doc = null;
if(corp.size()>0){
doc = corp.get(0);
LOG.info("corp size: "+corp.size());
}else{
LOG.info("corp size 0 ret:"+ret);
return ret;
}
ret = doc.getNamedAnnotationSets();
if(ret == null){
LOG.info("ret is null after getnamedannotation...");
ret = new HashMap<String, AnnotationSet>();
}else {
fruitfulannotationsDone++;
LOG.info("found something!: "+ret.values().iterator().next().toString());
}
return ret;
}
public boolean contains(SocialGraphVertex participant, IActivity act){
if(act.getActor()!=null && act.getActor().contains(participant.getName()))
return true;
if(act.getObject()!=null && act.getObject().contains(participant.getName()))
return true;
if(act.getTarget()!=null && act.getTarget().contains(participant.getName()))
return true;
return false;
}
}