package com.imgraph.tests.titan; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.InputStream; import java.io.Writer; import java.math.BigDecimal; import java.math.BigInteger; import java.net.InetAddress; import java.net.NetworkInterface; import java.net.SocketException; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.Enumeration; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; import java.util.StringTokenizer; import org.apache.cassandra.thrift.Cassandra; import org.apache.cassandra.thrift.Compression; import org.apache.cassandra.thrift.InvalidRequestException; import org.apache.cassandra.thrift.SchemaDisagreementException; import org.apache.cassandra.thrift.TBinaryProtocol; import org.apache.cassandra.thrift.TimedOutException; import org.apache.cassandra.thrift.UnavailableException; import org.apache.commons.configuration.BaseConfiguration; import org.apache.thrift.TException; import org.apache.thrift.protocol.TProtocol; import org.apache.thrift.transport.TFramedTransport; import org.apache.thrift.transport.TSocket; import org.apache.thrift.transport.TTransport; import org.yaml.snakeyaml.Yaml; import org.yaml.snakeyaml.nodes.Node; import org.yaml.snakeyaml.nodes.Tag; import org.yaml.snakeyaml.representer.Represent; import org.yaml.snakeyaml.representer.Representer; import com.imgraph.tests.titan.GraphTestCase.LoadFileType; import com.imgraph.tests.titan.Traversal.SearchMethod; import com.imgraph.tests.titan.TraversalResults.Path; import com.thinkaurelius.titan.core.TitanFactory; import com.thinkaurelius.titan.core.TitanGraph; import com.tinkerpop.blueprints.Direction; import com.tinkerpop.blueprints.Edge; import com.tinkerpop.blueprints.Vertex; import com.tinkerpop.blueprints.util.wrappers.batch.BatchGraph; import com.tinkerpop.blueprints.util.wrappers.batch.VertexIDType; public class TestTools { public static String getLocalIP() { String ipOnly = ""; try { Enumeration<NetworkInterface> nifs = NetworkInterface.getNetworkInterfaces(); if (nifs == null) return ""; while (nifs.hasMoreElements()) { NetworkInterface nif = nifs.nextElement(); // We ignore subinterfaces - as not yet needed. if (!nif.isLoopback() && nif.isUp() && !nif.isVirtual()) { Enumeration<InetAddress> adrs = nif.getInetAddresses(); while (adrs.hasMoreElements()) { InetAddress adr = adrs.nextElement(); if (adr != null && !adr.isLoopbackAddress() && (nif.isPointToPoint() || !adr.isLinkLocalAddress())) { String adrIP = adr.getHostAddress(); String adrName; if (nif.isPointToPoint()) // Performance issues getting hostname for mobile internet sticks adrName = adrIP; else adrName = adr.getCanonicalHostName(); if (!adrName.equals(adrIP)) return adrIP; else ipOnly = adrIP; } } } } // if (ipOnly.length()==0) Logger.getLogger(Net.class.getName()).log(Level.WARNING, "No IP address available"); return ipOnly; } catch (SocketException ex) { //Logger.getLogger(Net.class.getName()).log(Level.WARNING, "No IP address available", ex); return ""; } } public static long nextLong(Random rng, long n) { // error checking and 2^x checking removed for simplicity. long bits, val; do { bits = (rng.nextLong() << 1) >>> 1; val = bits % n; } while (bits-val+(n-1) < 0L); return val; } private static class NullRepresenter extends Representer { public NullRepresenter() { super(); // null representer is exceptional and it is stored as an instance // variable. this.nullRepresenter = new RepresentNull(); } private class RepresentNull implements Represent { public Node representData(Object data) { // possible values are here http://yaml.org/type/null.html return representScalar(Tag.NULL, ""); } } } public static TitanGraph startTitan(CassandraStartMsg cassandraStartMsg) throws IOException { String cassandraConfFileName = TestTools.genCassandraYaml(cassandraStartMsg); File directory = new File(cassandraStartMsg.getCassandraDirectory()); if (!TestManager.removeDirectory(directory)) throw new RuntimeException("The directory " + cassandraStartMsg.getCassandraDirectory() + " was not completely deleted"); TitanGraph graph = TestTools.openTitanGraph(cassandraStartMsg.isStartInBatchMode(), cassandraConfFileName, true, null); System.out.println("Titan graph was started in " + (cassandraStartMsg.isStartInBatchMode()?"normal mode":"batch mode")); return graph; } @SuppressWarnings("unchecked") public static String genCassandraYaml(CassandraStartMsg cassandraStartMsg) throws IOException{ InputStream input = new String().getClass().getResourceAsStream("/cassandra.yaml"); Yaml inYaml = new Yaml(); LinkedHashMap<String, Object> data = (LinkedHashMap<String, Object>) inYaml.load(input); BigInteger initialToken =genInitialToken(cassandraStartMsg.getNumberOfNodes(), cassandraStartMsg.getNodeNumber()); System.out.println("Token: " + initialToken); data.put("initial_token", initialToken); data.put("listen_address", cassandraStartMsg.getLocalIpAddress()); data.put("rpc_address", cassandraStartMsg.getLocalIpAddress()); data.put("endpoint_snitch", "RackInferringSnitch"); ((LinkedHashMap)((ArrayList) ((ArrayList<LinkedHashMap<String, Object>>)data.get("seed_provider")). get(0).get("parameters")).get(0)).put("seeds", cassandraStartMsg.getMainNodeIp()); ((ArrayList)data.get("data_file_directories")).clear(); ((ArrayList)data.get("data_file_directories")).add(cassandraStartMsg.getCassandraDirectory()); data.put("commitlog_directory", cassandraStartMsg.getCassandraDirectory() + "commitLog"); data.put("saved_caches_directory", cassandraStartMsg.getCassandraDirectory() + "saved_caches"); Yaml outYaml = new Yaml(new NullRepresenter()); String fileName = cassandraStartMsg.getTempDirectory() + "cassandra_" + cassandraStartMsg.getNodeNumber() + ".yaml"; Writer writer = new FileWriter(fileName); outYaml.dump(data, writer); writer.close(); System.out.println("The file " + fileName + " was generated satisfactorily"); return fileName; } private static BigInteger genInitialToken(int numberOfNodes, int nodeNumber) { BigInteger token = BigInteger.valueOf(nodeNumber); BigInteger pow = BigInteger.valueOf(2).pow(127).subtract(BigInteger.ONE); token = token.multiply(pow).divide(BigInteger.valueOf(numberOfNodes)); return token.abs(); } public static TitanGraph openTitanGraph(boolean batchMode, String cassandraConfigDir, boolean embeddedCassandra, String cassandraIpAddress) { BaseConfiguration configuration = new BaseConfiguration(); if (batchMode) { configuration.setProperty("storage.batch-loading", "true"); configuration.setProperty("storage.buffer-size", "2048"); configuration.setProperty("storage.write-attempts", "10"); configuration.setProperty("storage.attempt-wait", "1000"); } if (embeddedCassandra) { configuration.setProperty("storage.backend", "embeddedcassandra"); configuration.setProperty("storage.cassandra-config-dir", "file:///" + cassandraConfigDir); } else { configuration.setProperty("storage.backend","cassandra"); configuration.setProperty("storage.hostname", cassandraIpAddress); } /* */ return TitanFactory.open(configuration); } private static Map<StatisticalIndicators, Double> calculateIndicators(List<Long> traversalTimes) { Map<StatisticalIndicators, Double> indicators = new HashMap<StatisticalIndicators, Double>(); Collections.sort(traversalTimes); if (traversalTimes.size() % 2 == 0) indicators.put(StatisticalIndicators.MEDIAN, (traversalTimes.get((traversalTimes.size()/2) - 1) + traversalTimes.get((traversalTimes.size()/2) + 1))/2D); else indicators.put(StatisticalIndicators.MEDIAN, traversalTimes.get(traversalTimes.size()/2)/2D); indicators.put(StatisticalIndicators.MIN, (double)traversalTimes.get(0)); indicators.put(StatisticalIndicators.MAX, (double)traversalTimes.get(traversalTimes.size()-1)); double sum = 0, mean; for (long time : traversalTimes) sum += (double)time; mean = sum / traversalTimes.size(); indicators.put(StatisticalIndicators.MEAN, mean); sum = 0; for (long time : traversalTimes) sum += Math.pow(time-mean, 2); indicators.put(StatisticalIndicators.DEV_STD, Math.sqrt(sum/(traversalTimes.size()-1))); return indicators; } public static Map<StatisticalIndicators, Double> testTraversalFromFile(TitanGraph graph, Direction direction, String fileName, int maxDepth, String outLogFile, int delay, SearchMethod searchMethod) throws Exception { BigTextFile file = null; BufferedWriter writer = null; List<Long> traversalTimes = new ArrayList<Long>(); int counter = 0; Traversal traversal = new Traversal(); String startVertexId, endVertexId; try { file = new BigTextFile(fileName); if (outLogFile != null) writer = new BufferedWriter(new FileWriter(new File( outLogFile), false)); traversal.setHops(maxDepth); traversal.setDirection(direction); traversal.setSearchMethod(searchMethod); for (String line : file) { if (!line.startsWith("#")) { StringTokenizer tokenizer = new StringTokenizer(line, ","); startVertexId = tokenizer.nextToken(); endVertexId = tokenizer.nextToken(); traversal.setSearchedId(Long.parseLong(endVertexId)); Iterator<Vertex> iterator = graph.getVertices("name", Long.parseLong(startVertexId)).iterator(); if (!iterator.hasNext()) throw new RuntimeException("Vertex not found with id " + startVertexId); TraversalResults results = traversal.search(iterator.next()); traversalTimes.add(results.getTime()); String pathString = ""; if (!results.getPaths().isEmpty()) pathString = results.getPaths().get(0).toString(); if (writer != null) { writer.write(startVertexId + "," + endVertexId + "," + results.getTime()); if (pathString.equals("")) { writer.write(",N,"); } else { writer.write(",Y," + pathString); } writer.newLine(); } counter++; System.out.println("Traversal # " + counter + " executed"); Thread.sleep(delay); } } return calculateIndicators(traversalTimes); } finally { if (file!=null) file.Close(); if (writer != null) { writer.flush(); writer.close(); } } } public static Map<StatisticalIndicators, Double> testReads(TitanGraph graph, String testFile, String outLogFile) throws Exception { BigTextFile file = null; BufferedWriter writer = null; try { file = new BigTextFile(testFile); List<Long> cellIds = new ArrayList<Long>(); List<Long> times = new ArrayList<Long>(); long startTime, endTime; if (outLogFile != null) { writer = new BufferedWriter(new FileWriter(new File( outLogFile), false)); writer.write("CELL_ID, TIME(nanoseconds)"); } for (String line : file) { if (!line.startsWith("#")) { StringTokenizer tokenizer = new StringTokenizer(line, ","); cellIds.add(Long.parseLong(tokenizer.nextToken())); cellIds.add(Long.parseLong(tokenizer.nextToken())); } } for (long cellId : cellIds) { startTime = System.nanoTime(); //startTime = new Date().getTime(); graph.getVertices("name", cellId).iterator().next(); endTime = System.nanoTime(); //endTime = new Date().getTime(); times.add(endTime-startTime); if (writer != null) { writer.newLine(); writer.write(cellId + "," + (endTime-startTime)); } } return calculateIndicators(times); } finally { if (file!=null) file.Close(); if (writer!=null) writer.close(); } } public static Map<StatisticalIndicators, Double> testWrites(TitanGraph graph, String testFile, String outLogFile) throws Exception { BigTextFile file = null; BufferedWriter writer = null; try { Random random = new Random(); file = new BigTextFile(testFile); List<Long[]> cellIds = new ArrayList<Long[]>(); List<Long> times = new ArrayList<Long>(); long startTime, endTime; Set<Long> newCellIds = new HashSet<Long>(); long cellId; if (outLogFile != null) writer = new BufferedWriter(new FileWriter(new File( outLogFile), false)); int transactionCounter=0; for (String line : file) { if (!line.startsWith("#")) { StringTokenizer tokenizer = new StringTokenizer(line, ","); boolean isNewId = false; do { cellId = nextLong(random, 50000) + 9999999999L; if (!newCellIds.contains(cellId)) { newCellIds.add(cellId); isNewId = true; } } while (!isNewId); cellIds.add(new Long[]{cellId, Long.parseLong(tokenizer.nextToken()), Long.parseLong(tokenizer.nextToken())}); } } if (writer != null) writer.write("NEW CELL ID, TIME(nanoseconds)"); for (Long[] destCellIds : cellIds) { try { Vertex vertexA = graph.getVertices("name", destCellIds[1]).iterator().next(); Vertex vertexB = graph.getVertices("name", destCellIds[2]).iterator().next(); startTime = System.nanoTime(); Vertex vertex = graph.addVertex(null); vertex.setProperty("name", destCellIds[0]); vertex.addEdge("link", vertexA); vertex.addEdge("link", vertexB); graph.commit(); transactionCounter++; } catch (Exception x) { System.out.println("Error on transaction " + (transactionCounter+1) + ", " + destCellIds[0] + "-" + destCellIds[1]); throw new Exception(x); } //System.out.println("Transaction #" + transactionCounter + " processed"); endTime = System.nanoTime(); if (writer != null) { writer.newLine(); writer.write(String.valueOf(destCellIds[0]) + "," + (endTime-startTime)); } //endTime = new Date().getTime(); times.add(endTime-startTime); } return calculateIndicators(times); } finally { if (file!=null) file.Close(); if (writer != null) { writer.flush(); writer.close(); } } } public static void loadGraphV2(String fileName, LoadFileType loadType, String cassandraAddress) throws Exception { long lineCounter = 0; BigTextFile file = null; file = new BigTextFile(fileName); String fromNodeId = null; List<String> toNodeIdList = new ArrayList<String>(); Map<String, Long> vertexIds = new HashMap<String, Long>(); Vertex fromNode = null, toNode = null; StringTokenizer tokenizer = null; long edgeCounter=0, vertexCounter=0; //clearCassandra(); //Iterator<Vertex> vertexIterator = null; TitanGraph graph = openTitanGraph(true, null, false, cassandraAddress); BatchGraph<TitanGraph> bgraph = new BatchGraph<TitanGraph>(graph, VertexIDType.NUMBER, 20000); graph.createKeyIndex("name", Vertex.class); long start = new Date().getTime(); for (String line : file) { if (!line.startsWith("#")) { lineCounter++; tokenizer = new StringTokenizer(line); toNodeIdList.clear(); switch(loadType) { case ADJ_LIST_TEXT_FILE: fromNodeId = tokenizer.nextToken(","); while (tokenizer.hasMoreTokens()) toNodeIdList.add(tokenizer.nextToken(",").trim()); break; case SIMPLE_TEXT_FILE: fromNodeId = tokenizer.nextToken(); toNodeIdList.add(tokenizer.nextToken()); default: } fromNode = bgraph.getVertex(Long.parseLong(fromNodeId)); if (fromNode==null) fromNode = bgraph.addVertex(Long.parseLong(fromNodeId)); fromNode.setProperty("name", Long.parseLong(fromNodeId)); for (String toNodeId : toNodeIdList) { toNode = bgraph.getVertex(Long.parseLong(toNodeId)); if (toNode==null) toNode = bgraph.addVertex(Long.parseLong(toNodeId)); toNode.setProperty("name", Long.parseLong(toNodeId)); bgraph.addEdge(null, fromNode,toNode, "link"); edgeCounter++; } if (lineCounter % 500 == 0) System.out.print("."); if (lineCounter % 50000 == 0) System.out.println("\n" + lineCounter + " lines have been read"); } } long end = new Date().getTime(); graph.shutdown(); System.out.println("File succesfully loaded: " + vertexCounter + " vertices and " + edgeCounter + " edges. Time(ms): " + (end-start)); } public static void loadGraph(String fileName, LoadFileType loadType, String cassandraAddress) throws Exception { long lineCounter = 0; BigTextFile file = null; file = new BigTextFile(fileName); String fromNodeId = null; List<String> toNodeIdList = new ArrayList<String>(); Map<String, Long> vertexIds = new HashMap<String, Long>(); Vertex fromNode = null, toNode = null; StringTokenizer tokenizer = null; long edgeCounter=0, vertexCounter=0; //clearCassandra(); //Iterator<Vertex> vertexIterator = null; TitanGraph graph = openTitanGraph(true, null, false, cassandraAddress); Long vertexId; Iterator<Vertex> vertexIterator; graph.createKeyIndex("name", Vertex.class); long start = new Date().getTime(); for (String line : file) { if (!line.startsWith("#")) { lineCounter++; tokenizer = new StringTokenizer(line); toNodeIdList.clear(); switch(loadType) { case ADJ_LIST_TEXT_FILE: fromNodeId = tokenizer.nextToken(","); while (tokenizer.hasMoreTokens()) toNodeIdList.add(tokenizer.nextToken(",").trim()); break; case SIMPLE_TEXT_FILE: fromNodeId = tokenizer.nextToken(); toNodeIdList.add(tokenizer.nextToken()); default: } vertexId = vertexIds.get(fromNodeId); if (vertexId != null) { fromNode = graph.getVertex(vertexId); } else { /* vertexIterator = graph.getVertices("name", Long.parseLong(fromNodeId)).iterator(); if (vertexIterator.hasNext()) { fromNode = vertexIterator.next(); } else {*/ fromNode = graph.addVertex(Long.parseLong(fromNodeId)); fromNode.setProperty("name", Long.parseLong(fromNodeId)); vertexIds.put(fromNodeId, (Long)fromNode.getId()); vertexCounter++; //} } for (String toNodeId : toNodeIdList) { vertexId = vertexIds.get(toNodeId); if (vertexId != null) { toNode = graph.getVertex(vertexId); } else { /* vertexIterator = graph.getVertices("name", Long.parseLong(toNodeId)).iterator(); if (vertexIterator.hasNext()) { toNode = vertexIterator.next(); } else {*/ toNode = graph.addVertex(Long.parseLong(toNodeId)); toNode.setProperty("name", Long.parseLong(toNodeId)); vertexIds.put(toNodeId, (Long) toNode.getId()); vertexCounter++; //} } graph.addEdge(null, fromNode, toNode, "link"); edgeCounter++; } if (lineCounter % 500 == 0) { System.out.print("."); } if (lineCounter % 50000 == 0) { System.out.println("\n" + lineCounter + " lines have been read"); } if (vertexIds.size() % 500000 == 0) { System.out.println("Flushing data.... "); graph.commit(); //if (vertexIds.size() > 1000000) { /* vertexIds.clear(); graph.shutdown(); Thread.sleep(10000); System.out.println("Resuming load...."); graph = openTitanGraph(true, null, false, cassandraAddress); */ } } } long end = new Date().getTime(); graph.shutdown(); System.out.println("File succesfully loaded: " + vertexCounter + " vertices and " + edgeCounter + " edges. Time(ms): " + (end-start)); } private static void clearCassandra() { TTransport tr = new TFramedTransport(new TSocket("127.0.0.1", 9160)); TProtocol proto = new TBinaryProtocol(tr); Cassandra.Client client = new Cassandra.Client(proto); try { tr.open(); //String cql="use titan;"; client.execute_cql_query(ByteBuffer.wrap("drop keyspace titan;".getBytes()), Compression.NONE); tr.close(); } catch (Exception e) { } } /** * @param args */ public static void main(String[] args) throws Exception { //clearCassandra(); //genCassandraYaml("c:\\uclouvain\\master_thesis\\", "/mnt/data/imgraph/cassandra/", // 2, 1, "192.168.0.214", "192.168.0.215"); //TitanGraph g = openTitanGraph(false, "C:\\uclouvain\\master_thesis\\out.yaml"); // loadGraph("c:\\uclouvain\\master_thesis\\graph_data\\soc-Epinions1\\soc-Epinions1.txt", LoadFileType.SIMPLE_TEXT_FILE, // "C:\\uclouvain\\master_thesis\\out.yaml"); // loadGraph("c:\\uclouvain\\master_thesis\\graph_data\\tiny.txt", LoadFileType.SIMPLE_TEXT_FILE, // "C:\\uclouvain\\master_thesis\\tests\\cassandra.yaml"); /* for (Vertex v1 : g.getVertices()) { System.out.println("\nVertex name: " + v1.getProperty("name") + ", id: " + v1.getId() + ":" + v1.getId().getClass()); System.out.println("OUT"); for (Edge e : v1.getEdges(Direction.OUT)) System.out.print(e.getVertex(Direction.IN).getProperty("name") + " "); System.out.println("\nIN"); for (Edge e : v1.getEdges(Direction.IN)) System.out.print(e.getVertex(Direction.OUT).getProperty("name") + " "); } Vertex v = g.getVertex(4L); System.out.println(v.getProperty("name")); Traversal traversal = new Traversal(); traversal.setHops(3); traversal.setDirection(Direction.OUT); traversal.setSearchedId(23L); Vertex startVertex = g.getVertices("name", 1716L).iterator().next(); System.out.println("Starting traversal..."); TraversalResults results = traversal.search(startVertex); System.out.print("Time: " + results.getTime() + "ms. "); for (Path path : results.getPaths()) System.out.println(path); g.shutdown(); /* Iterator<Vertex> vertexIterator = g.getVertices("name", "1").iterator(); if (vertexIterator.hasNext()) { Vertex v1 = vertexIterator.next(); System.out.println("Name: " + v1.getProperty("name")); System.out.println("OUT\n"); for (Edge e : v1.getEdges(Direction.OUT)) System.out.print(e.getVertex(Direction.OUT).getProperty("name") + " "); System.out.println("\nIN\n"); for (Edge e : v1.getEdges(Direction.IN)) System.out.print(e.getVertex(Direction.IN).getProperty("name") + " "); } else { System.out.println("NOT FOUND!!!"); } */ } }