package esl.cuenet.ranking.network; import com.mongodb.BasicDBObject; import esl.cuenet.query.drivers.mongodb.MongoDB; import esl.cuenet.ranking.EntityBase; import esl.cuenet.ranking.TypedEdge; import esl.cuenet.ranking.URINode; import esl.cuenet.source.accessors.AccessorConstants; import esl.cuenet.source.accessors.Utils; import org.apache.commons.io.FileUtils; import org.apache.log4j.Logger; import org.neo4j.cypher.javacompat.ExecutionEngine; import org.neo4j.cypher.javacompat.ExecutionResult; import org.neo4j.graphdb.*; import org.neo4j.graphdb.index.Index; import org.neo4j.graphdb.index.IndexHits; import org.neo4j.graphdb.traversal.Evaluators; import org.neo4j.graphdb.traversal.TraversalDescription; import org.neo4j.graphdb.traversal.Traverser; import org.neo4j.kernel.EmbeddedGraphDatabase; import org.neo4j.kernel.Traversal; import java.io.File; import java.io.IOException; import java.util.*; public class NeoEntityBase implements EntityBase { private final GraphDatabaseService graphDbExt; private Logger logger = Logger.getLogger(NeoEntityBase.class); private HashMap<String, Node> nodeMap = new HashMap<String, Node>(); public static final String ENT_GRAPH_LITERAL_INDEX = "text"; private HashSet<Long> entityIdSet = new HashSet<Long>(); @Override public Iterator<Long> iterator() { return entityIdSet.iterator(); } enum Relation implements RelationshipType { SAME } public NeoEntityBase (GraphDatabaseService graphDb) { this.graphDbExt = graphDb; load(); } public static void printEntity(URINode entity, Logger logger) { if (entity == null) { logger.info("NULL"); return; } String s = ""; for (TypedEdge r: entity.getAllRelationships()) { if (r.hasProperty(EntityBase.TYPE) && r.getEndNode().hasProperty(EntityBase.TEXT)) s += r.getProperty(EntityBase.TYPE) + " " + r.getEndNode().getProperty(EntityBase.TEXT) + "; "; if (s.length() > 100) break; } logger.info(s); } private void load() { Index<Node> nodeIndex = graphDbExt.index().forNodes(EntityBase.ENTITY_INDEX); IndexHits<Node> hits = nodeIndex.get(EntityBase.TYPE, EntityBase.ENTITY); for (Node n: hits) entityIdSet.add(n.getId()); } public void construct() { File storeTempDir = null; try { storeTempDir = new File(FileUtils.getTempDirectoryPath() + File.separator + System.currentTimeMillis()); logger.info("Creating temp graph db at: " + storeTempDir.getAbsolutePath()); FileUtils.forceMkdir(storeTempDir); } catch (IOException e) { e.printStackTrace(); } GraphDatabaseService graphDb = new EmbeddedGraphDatabase(storeTempDir.getAbsolutePath()); Transaction tx = graphDb.beginTx(); try { (new EmailScanner()).populate(graphDb); (new FacebookIdScanner()).populate(graphDb); tx.success(); mergeIntoMainDB(); } catch (Exception e) { tx.failure(); e.printStackTrace(); } finally { tx.finish(); graphDb.shutdown(); } try { FileUtils.deleteDirectory(storeTempDir); logger.info("Deleted temp db: " + storeTempDir.getAbsolutePath()); } catch (IOException e) { e.printStackTrace(); } } private void mergeIntoMainDB() { LinkedList<Node> set = new LinkedList<Node>(nodeMap.values()); int md; Index<Node> xIndex = graphDbExt.index().forNodes(ENT_GRAPH_LITERAL_INDEX); while (set.size() > 0) { Node node = set.getFirst(); if (node.getProperty("seen").equals("true")) { set.remove(node); continue; } TraversalDescription td = Traversal.description() .depthFirst() .relationships(Relation.SAME) .evaluator(Evaluators.excludeStartPosition()); Traverser traverser = td.traverse(node); // String output = node.getProperty("text") + "\n"; md = 0; Node entityNode = graphDbExt.createNode(); entityNode.setProperty(EntityBase.TYPE, EntityBase.ENTITY); Node propertyNode = graphDbExt.createNode(); propertyNode.setProperty(EntityBase.TEXT, node.getProperty(EntityBase.TEXT)); entityNode.createRelationshipTo(propertyNode, NeoRelationships.BLANK). setProperty(EntityBase.TYPE, node.getProperty(EntityBase.TYPE)); xIndex.add(propertyNode, EntityBase.TEXT, node.getProperty(EntityBase.TEXT)); for (Path path: traverser) { // output += "At depth " + path.length() + " => (" // + path.endNode().getProperty(EntityBase.TYPE) + ") " // + path.endNode().getProperty(EntityBase.TEXT) + "\n"; path.endNode().setProperty("seen", "true"); md++; propertyNode = graphDbExt.createNode(); propertyNode.setProperty(EntityBase.TEXT, path.endNode().getProperty(EntityBase.TEXT)); entityNode.createRelationshipTo(propertyNode, NeoRelationships.BLANK). setProperty(EntityBase.TYPE, path.endNode().getProperty(EntityBase.TYPE)); xIndex.add(propertyNode, EntityBase.TEXT, path.endNode().getProperty(EntityBase.TEXT)); } set.remove(node); // if (md == 0) entityNode.delete(); // logger.info(output); } } @Override public URINode lookup(String key, Object value) { Index<Node> xIndex = graphDbExt.index().forNodes(NeoEntityBase.ENT_GRAPH_LITERAL_INDEX); IndexHits<Node> hits = xIndex.get(EntityBase.TEXT, value); if (hits.size() == 0) return null; Node hit = hits.getSingle(); Relationship outgoing = hit.getRelationships(Direction.INCOMING).iterator().next(); if (outgoing.getProperty(EntityBase.TYPE).equals(key)) return new NeoURINode(outgoing.getStartNode()); return null; } private class FacebookIdScanner extends MongoDB { public FacebookIdScanner() { super(AccessorConstants.DBNAME); } public void populate(GraphDatabaseService graphDb) { DBReader reader = this.startReader("fb_users"); BasicDBObject keys = new BasicDBObject(); keys.put("id", 1); keys.put("name", 1); keys.put("_id", 0); reader.getAll(keys); String id, name; while (reader.hasNext()) { BasicDBObject obj = (BasicDBObject) reader.next(); id = obj.getString("id"); name = obj.getString("name"); Node fbIdNode = graphDb.createNode(); fbIdNode.setProperty(EntityBase.TEXT, id); fbIdNode.setProperty(EntityBase.TYPE, EntityBase.V_FB_ID); fbIdNode.setProperty("seen", "false"); fbIdNode.setProperty("count", 1); Node fbUser; if (nodeMap.containsKey(name)) fbUser = nodeMap.get(name); else { fbUser = graphDb.createNode(); fbUser.setProperty(EntityBase.TEXT, name); fbUser.setProperty(EntityBase.TYPE, EntityBase.V_NAME); fbUser.setProperty("count", 1); fbUser.setProperty("seen", "false"); nodeMap.put(name, fbUser); } fbUser.createRelationshipTo(fbIdNode, Relation.SAME); } } } private class EmailScanner extends MongoDB { public EmailScanner() { super(AccessorConstants.DBNAME); } public void populate(GraphDatabaseService graphDb) { DBReader reader = this.startReader("emails"); BasicDBObject keys = new BasicDBObject(); keys.put("to", 1); keys.put("cc", 1); keys.put("from", 1); keys.put("_id", 0); reader.getAll(keys); String to, from, cc; List<Map.Entry<String, String>> entries = new ArrayList<Map.Entry<String, String>>(); while (reader.hasNext()) { BasicDBObject obj = (BasicDBObject) reader.next(); to = obj.getString("to"); if (to != null) entries.addAll(Utils.parseEmailAddresses(to)); from = obj.getString("from"); if (from != null) entries.addAll(Utils.parseEmailAddresses(from)); cc = obj.getString("cc"); if (cc != null)entries.addAll(Utils.parseEmailAddresses(cc)); } construct(graphDb, entries); } private void construct(GraphDatabaseService graphDb, List<Map.Entry<String, String>> entries) { for(Map.Entry<String, String> entry: entries) { String em = entry.getKey(); String nm = entry.getValue(); if (nm == null) continue; Node emailNode, nameNode; if ( !nodeMap.containsKey(em) ) { emailNode = graphDb.createNode(); emailNode.setProperty(EntityBase.TEXT, em); emailNode.setProperty(EntityBase.TYPE, EntityBase.V_EMAIL); emailNode.setProperty("seen", "false"); emailNode.setProperty("count", 1); nodeMap.put(em, emailNode); } else { emailNode = nodeMap.get(em); emailNode.setProperty("count", ((Integer) emailNode.getProperty("count")) + 1); } if ( !nodeMap.containsKey(nm) ) { nameNode = graphDb.createNode(); nameNode.setProperty(EntityBase.TEXT, nm); nameNode.setProperty(EntityBase.TYPE, EntityBase.V_NAME); nameNode.setProperty("count", 1); nameNode.setProperty("seen", "false"); nodeMap.put(nm, nameNode); } else { nameNode = nodeMap.get(nm); nameNode.setProperty("count", ((Integer) nameNode.getProperty("count")) + 1); } nameNode.createRelationshipTo(emailNode, Relation.SAME); } } } }