package esl.cuenet.ranking.sources;
import com.mongodb.BasicDBObject;
import esl.cuenet.model.Constants;
import esl.cuenet.query.drivers.mongodb.MongoDB;
import esl.cuenet.ranking.EntityBase;
import esl.cuenet.ranking.EventEntityNetwork;
import esl.cuenet.ranking.SourceInstantiator;
import esl.cuenet.ranking.URINode;
import esl.cuenet.ranking.network.OntProperties;
import esl.cuenet.source.accessors.AccessorConstants;
import esl.cuenet.source.accessors.Utils;
import org.apache.log4j.Logger;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
public class EmailSource extends MongoDB implements SourceInstantiator {
private Logger logger = Logger.getLogger(EmailSource.class);
public EmailSource() {
super(AccessorConstants.DBNAME);
}
@Override
public void populate(EventEntityNetwork network, EntityBase entityBase) {
DBReader reader = this.startReader("emails");
BasicDBObject keys = new BasicDBObject();
keys.put("_id", 0);
reader.getAll(keys);
String date, to, from, cc, uid;
String occursDuringPropertyURI = Constants.CuenetNamespace + Constants.OccursDuring;
String participatesInPropertyURI = Constants.DOLCE_Lite_Namespace + Constants.ParticipantIn;
String namePropertyURI = Constants.CuenetNamespace + "name";
String emailPropertyURI = Constants.CuenetNamespace + "email";
//make copy of dbObjects. Mongo gc trashes cursors which are inactive for > 10mins.
List<BasicDBObject> dbObjects = new ArrayList<BasicDBObject>();
while (reader.hasNext()) {
BasicDBObject obj = (BasicDBObject) reader.next();
dbObjects.add(obj);
}
logger.info(" Total number of emails: " + dbObjects.size());
int c = 0; int ix = 0;
network.startBulkLoad();
for (BasicDBObject obj: dbObjects) {
List<Map.Entry<String, String>> entries = new ArrayList<Map.Entry<String, String>>();
uid = obj.getString("uid");
to = obj.getString("to");
if (to != null) entries.addAll(Utils.parseEmailAddresses(to));
from = obj.getString("from");
if (from != null) entries.addAll(Utils.parseEmailAddresses(from));
cc = obj.getString("cc");
if (cc != null) entries.addAll(Utils.parseEmailAddresses(cc));
date = obj.getString("date");
URINode emailInstance = SourceHelper.createInstance(network, Constants.CuenetNamespace +
Constants.EmailExchangeEvent + "_" + uid);
URINode timeInterval = SourceHelper.createInstance(network, Constants.CuenetNamespace +
Constants.TimeInterval);
long t = Utils.parseEmailDate(date).getTime();
timeInterval.
createEdgeTo(SourceHelper.createLiteral(network, t)).
setProperty(OntProperties.ONT_URI, Constants.CuenetNamespace + Constants.TimestampMillisStart);
timeInterval.
createEdgeTo(SourceHelper.createLiteral(network, t)).
setProperty(OntProperties.ONT_URI, Constants.CuenetNamespace + Constants.TimestampMillisEnd);
emailInstance.createEdgeTo(timeInterval).setProperty(OntProperties.ONT_URI, occursDuringPropertyURI);
for (Map.Entry<String, String> entry: entries) {
URINode personInstance = SourceHelper.createInstance(network, Constants.CuenetNamespace +
Constants.Person + "_" + c);
boolean f = false;
if (entry.getKey() != null) {
personInstance.
createEdgeTo(SourceHelper.createLiteral(network, entry.getKey())).
setProperty(OntProperties.ONT_URI, namePropertyURI);
URINode entityNode = entityBase.lookup(EntityBase.V_EMAIL, entry.getKey());
if (entityNode != null) {
personInstance.createEdgeTo(entityNode)
.setProperty(OntProperties.TYPE, OntProperties.IS_SAME_AS);
f = true;
}
}
if (entry.getValue() != null) {
personInstance.
createEdgeTo(SourceHelper.createLiteral(network, entry.getValue())).
setProperty(OntProperties.ONT_URI, emailPropertyURI);
}
if ( entry.getValue() != null && !f )
logger.info("Didn't create link for = " + entry.getKey() + " " + entry.getValue());
personInstance.createEdgeTo(emailInstance).
setProperty(OntProperties.ONT_URI, participatesInPropertyURI);
c += 1;
}
if (ix % 1000 == 0) {
logger.info("Added " + ix + " mails");
network.flush();
}
ix += 1;
//if (ix % 2000 == 0) break; //for testing
}
network.finishBulkLoad();
logger.info("EmailSource import complete");
}
}