/*******************************************************************************
* Copyright (c) 2014 EURA NOVA.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the GNU Public License v2.0
* which accompanies this distribution, and is available at
* http://www.gnu.org/licenses/old-licenses/gpl-2.0.html
*
* Contributors:
* Aldemar Reynaga - initial API and implementation
* Salim Jouili - initial API and implementation
******************************************************************************/
package com.steffi.loader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.StringTokenizer;
import java.util.UUID;
import org.zeromq.ZMQ;
import com.steffi.common.BigTextFile;
import com.steffi.common.Configuration;
import com.steffi.common.ImgLogger;
import com.steffi.common.Configuration.Key;
import com.steffi.common.ImgLogger.LogLevel;
import com.steffi.model.EdgeType;
import com.steffi.model.SteffiGraph;
import com.steffi.model.SteffiVertex;
import com.steffi.networking.ClientThread;
import com.steffi.networking.messages.AddressVertexRepMsg;
import com.steffi.networking.messages.AddressVertexReqMsg;
import com.steffi.networking.messages.LoadMessage;
import com.steffi.networking.messages.Message;
import com.steffi.networking.messages.MessageType;
import com.steffi.networking.messages.LoadMessage.LoadFileType;
import com.steffi.storage.StorageTools;
import com.tinkerpop.blueprints.TransactionalGraph.Conclusion;
import com.tinkerpop.blueprints.Vertex;
import com.tinkerpop.blueprints.impls.steffi.SteffiGraphDBGraph;
/**
* @author Aldemar Reynaga
* Batch loader for text files using the edge list format or the adjacent list format
*/
public class TextFileLoader implements ResponseProcessor {
private int pendingLoadBlocks;
private boolean fileReadCompleted;
private boolean loadingInProcess;
private Object lock;
private Map<String, ClientThread> clientThreads;
private Map<String, List<LoadVertexInfo>[]> addressVerticesInfo;
private List<Long> vertexIds;
private ZMQ.Context context;
private ZMQ.Socket socketToMember;
private boolean update2HNInProcess;
private int pending2HNRequests;
private int vertexCounter;
private int numberOfLoaders;
private boolean loaderIsMember;
private int processVerticesCounter;
public TextFileLoader() {
initClientThreads(StorageTools.getAddressesIps());
lock = new String("TEXT_LOADER");
numberOfLoaders = Integer.parseInt(Configuration.getProperty(Key.NUM_LOADERS));
loaderIsMember = true;
}
public TextFileLoader(Map<String, String> clusterAddresses, String clientIp) {
initClientThreads(clusterAddresses);
context = ZMQ.context(1);
socketToMember = context.socket(ZMQ.REQ);
socketToMember.connect("tcp://" + clientIp + ":" + Configuration.getProperty(Configuration.Key.NODE_PORT));
lock = new String("TEXT_LOADER");
numberOfLoaders = Integer.parseInt(Configuration.getProperty(Key.NUM_LOADERS));
loaderIsMember = false;
}
private void initClientThreads(Map<String, String> clusterAddresses) {
this.clientThreads = new HashMap<String, ClientThread>();
ClientThread clientThread = null;
String loaderId = UUID.randomUUID().toString();
for (Entry<String, String> entry : clusterAddresses.entrySet()) {
clientThread = new ClientThread(entry.getValue(), entry.getValue(), "loader_" + loaderId,
this);
clientThreads.put(entry.getKey(), clientThread);
new Thread(clientThread).start();
}
}
private Map<Long, String> getVertexAddresses() throws IOException {
AddressVertexReqMsg message = new AddressVertexReqMsg();
message.setCellIds(vertexIds);
socketToMember.send(Message.convertMessageToBytes(message), 0);
AddressVertexRepMsg response = (AddressVertexRepMsg) Message.readFromBytes(socketToMember.recv(0));
return response.getCellAddresses();
}
@SuppressWarnings("unchecked")
private void classifyVerticesInfo(Collection<LoadVertexInfo> verticesInfo) throws IOException {
int loaderIndex;
Map<Long, String> vertexAddresses = null;
if (!loaderIsMember)
vertexAddresses = getVertexAddresses();
for (List<LoadVertexInfo>[] vertexList : addressVerticesInfo.values()) {
for (int i=0; i<vertexList.length; i++)
vertexList[i] = null;
}
for (LoadVertexInfo vertexInfo : verticesInfo) {
String address;
if (loaderIsMember)
address = StorageTools.getCellAddress(vertexInfo.getVertexId());
else
address = vertexAddresses.get(vertexInfo.getVertexId());
List<LoadVertexInfo>[] vertexList = addressVerticesInfo.get(address);
if (vertexList == null) {
vertexList = (List<LoadVertexInfo>[]) new List[numberOfLoaders];
addressVerticesInfo.put(address, vertexList);
}
loaderIndex = (int) (vertexInfo.getVertexId() % numberOfLoaders);
if (vertexList[loaderIndex] == null)
vertexList[loaderIndex] = new ArrayList<LoadVertexInfo>();
vertexList[loaderIndex].add(vertexInfo);
}
}
private void sendVerticesInfo(Map<Long, LoadVertexInfo> verticesInfo, LoadFileType loadFileType) throws IOException {
classifyVerticesInfo(verticesInfo.values());
for (Entry<String, List<LoadVertexInfo>[]> entry : addressVerticesInfo.entrySet()) {
for (int i=0; i<entry.getValue().length; i++) {
List<LoadVertexInfo> subList = entry.getValue()[i];
if (subList != null) {
LoadMessage loadMessage = new LoadMessage();
loadMessage.setVerticesInfo(subList);
loadMessage.setLoadFileType(loadFileType);
loadMessage.setLoaderIndex(i);
clientThreads.get(entry.getKey()).addMsgToQueue(loadMessage);
updateBlockCounter(1);
}
}
}
}
private synchronized void updateBlockCounter(int addition) {
pendingLoadBlocks += addition;
}
private synchronized void updateVertexCounter(int addition) {
vertexCounter += addition;
}
private void verifyCompleteLoading() {
if (fileReadCompleted && pendingLoadBlocks == 0) {
synchronized (lock) {
loadingInProcess = false;
lock.notifyAll();
}
}
}
private void processOkResponse() {
updateBlockCounter(-1);
if (fileReadCompleted) {
if (pendingLoadBlocks % 1000 == 0) {
System.out.println("Pending load blocks: " + pendingLoadBlocks + "...");
System.out.flush();
}
}
verifyCompleteLoading();
}
@Override
public void processResponse(Message message) {
if (message.getType().equals(MessageType.LOAD_REP)) {
String [] response = message.getBody().split("::");
if (response[0].equals("OK")) {
processOkResponse();
updateVertexCounter(Integer.parseInt(response[1]));
} else {
throw new RuntimeException("Error processing load block: " + message.getBody());
}
} else if (message.getType().equals(MessageType.UPD_2HOP_NEIGHBORS_REP)) {
if (message.getBody().equals("OK"))
registerUpd2HNResponse();
else
throw new RuntimeException("Error processing an update request for local 2-hop neighbors");
}
}
private LoadVertexInfo getVertexInfo(Map<Long, LoadVertexInfo> verticesInfo, long vertexId) {
LoadVertexInfo vertexInfo = verticesInfo.get(vertexId);
if (vertexInfo == null) {
vertexInfo = new LoadVertexInfo(vertexId);
verticesInfo.put(vertexId, vertexInfo);
if (!loaderIsMember)
vertexIds.add(vertexId);
}
return vertexInfo;
}
private void addEdgeToVertexInfo(Map<Long, LoadVertexInfo> verticesInfo,
long sourceId, long destId, boolean isDirected) {
LoadVertexInfo vertexInfo = null;
vertexInfo = getVertexInfo(verticesInfo, sourceId);
if (isDirected)
vertexInfo.addOutEdge(destId);
else
vertexInfo.addUndirectedEdge(destId);
vertexInfo = getVertexInfo(verticesInfo, destId);
if (isDirected)
vertexInfo.addInEdge(sourceId);
else
vertexInfo.addUndirectedEdge(sourceId);
}
private synchronized void registerUpd2HNResponse() {
pending2HNRequests--;
System.out.println("Pending 2-hop neighbors requests: " + pending2HNRequests);
if (pending2HNRequests == 0) {
synchronized (lock) {
update2HNInProcess = false;
lock.notifyAll();
}
}
}
private void updateLocal2HopNeighbors() throws Exception {
Message message = new Message(MessageType.UPD_2HOP_NEIGHBORS_REQ);
update2HNInProcess = true;
for (ClientThread ct : clientThreads.values())
ct.addMsgToQueue(message);
pending2HNRequests = clientThreads.size();
lock = new String("UPDATE_2HN");
synchronized (lock) {
while (update2HNInProcess) {
lock.wait();
}
}
}
private void processVerticesInfo(Map<Long, LoadVertexInfo> verticesInfo, LoadFileType loadFileType) throws Exception {
sendVerticesInfo(verticesInfo, loadFileType);
processVerticesCounter++;
if (processVerticesCounter%5000 == 0) {
System.out.println("\nWaiting before sending more messages...");
System.out.flush();
Thread.sleep(20000);
System.out.println("Resuming message sending ...");
System.out.flush();
}
verticesInfo.clear();
if (!loaderIsMember)
vertexIds.clear();
}
public String[] load(String fileName, LoadFileType loadFileType, boolean isDirected) {
BigTextFile file = null;
Date startDate, endDate;
Map<Long, LoadVertexInfo> verticesInfo = new HashMap<Long,LoadVertexInfo>();
long fromNodeId=-1, toNodeId, lastFromNodeId;
long edgeCounter=0, lineCounter=0;
try {
file = new BigTextFile(fileName);
startDate = new Date();
StringTokenizer tokenizer = null;
System.out.print("Loading\n[");
pendingLoadBlocks = 0;
fileReadCompleted = false;
loadingInProcess = true;
vertexCounter = 0;
processVerticesCounter = 0;
addressVerticesInfo = new HashMap<String, List<LoadVertexInfo>[]>();
vertexIds = new ArrayList<Long>();
for (String line : file) {
if (!line.startsWith("#") && !line.trim().equals("")) {
tokenizer = new StringTokenizer(line);
lineCounter++;
switch (loadFileType) {
case ADJ_LIST_TEXT_FILE:
fromNodeId = Long.parseLong(tokenizer.nextToken(","));
while (tokenizer.hasMoreTokens()) {
addEdgeToVertexInfo(verticesInfo, fromNodeId, Long.parseLong(tokenizer.nextToken(",")),
isDirected);
edgeCounter++;
}
if (verticesInfo.size() >= 500)
processVerticesInfo(verticesInfo, loadFileType);
break;
case SIMPLE_TEXT_FILE :
lastFromNodeId = fromNodeId;
fromNodeId = Long.parseLong(tokenizer.nextToken());
toNodeId = Long.parseLong(tokenizer.nextToken());
if (lastFromNodeId != fromNodeId && verticesInfo.size() >= 500)
processVerticesInfo(verticesInfo, loadFileType);
addEdgeToVertexInfo(verticesInfo, fromNodeId, toNodeId, isDirected);
edgeCounter++;
break;
default:
throw new RuntimeException("Only text files can be processed with this loader");
}
}
if (lineCounter > 0 && (lineCounter % 50000) == 0) {
System.out.print("]\n" + lineCounter + " lines read\n[" );
System.out.flush();
}
else if (lineCounter % 1000 == 0) {
System.out.print(".");
System.out.flush();
}
}
if (!verticesInfo.isEmpty()) {
sendVerticesInfo(verticesInfo, loadFileType);
verticesInfo.clear();
}
System.out.println("]\nWaiting for pending load blocks....");
fileReadCompleted = true;
synchronized (lock) {
while (loadingInProcess) {
lock.wait();
}
}
if (Configuration.getProperty(Key.VIRTUAL_EDGES).equals("true")) {
ImgLogger.log(LogLevel.INFO, "Calculating local 2-Hop neighbors...");
updateLocal2HopNeighbors();
}
endDate = new Date();
ImgLogger.log(LogLevel.INFO, "File succesfully loaded in " + (endDate.getTime()-startDate.getTime()) +
"ms. " + (vertexCounter) +
" vertices and " + edgeCounter + " edges were created. " + lineCounter + " lines were processed");
return new String[] {String.valueOf(vertexCounter), String.valueOf(edgeCounter)};
} catch (Exception e) {
System.out.println("Error on line: " + lineCounter);
e.printStackTrace();
return null;
} finally {
if (file!=null)
file.Close();
}
}
public void close() {
if (clientThreads != null)
for (ClientThread lt : clientThreads.values())
lt.stop();
if (!loaderIsMember) {
socketToMember.close();
context.term();
}
}
public static int loadBlock(List<LoadVertexInfo> verticesInfo) {
SteffiVertex vertex;
int newVertices = 0;
SteffiGraph graph = SteffiGraph.getInstance();
for (LoadVertexInfo loadVertex : verticesInfo) {
vertex = (SteffiVertex) graph.retrieveRawCell(loadVertex.getVertexId());
if (vertex == null) {
vertex = new SteffiVertex(loadVertex.getVertexId(), null, false);
graph.storeCell(loadVertex.getVertexId(), vertex);
newVertices++;
}
for (long outEdgeDest : loadVertex.getOutEdges())
vertex.addPartialEdge(outEdgeDest, EdgeType.OUT, null, false);
for (long inEdgeDest : loadVertex.getInEdges())
vertex.addPartialEdge(inEdgeDest, EdgeType.IN, null, false);
for (long undEdgeDest : loadVertex.getUndirectedEdges())
vertex.addPartialEdge(undEdgeDest, EdgeType.UNDIRECTED, null, false);
vertex.trimToSize();
//vertex.compress();
}
return newVertices;
}
private static void processVertexInfo(SteffiGraphDBGraph graph, List<LoadVertexInfo> verticesInfo) {
Vertex v = null, w = null;
Map<Long, Vertex> tempVertices = new HashMap<Long, Vertex>();
for (LoadVertexInfo vi : verticesInfo) {
try{
v = tempVertices.get(vi.getVertexId());
if (v == null) {
v = graph.addVertex(vi.getVertexId());
tempVertices.put(vi.getVertexId(), v);
}
for (long destId : vi.getOutEdges()) {
w = tempVertices.get(destId);
if (w == null) {
w = graph.addVertex(destId);
tempVertices.put(destId, w);
}
graph.addEdge(null, v, w, null);
}
graph.stopTransaction(Conclusion.SUCCESS);
} catch (Exception x) {
throw new RuntimeException(x);
}
}
tempVertices.clear();
}
public static void singleProcessLoad(SteffiGraphDBGraph graph, String fileName) throws Exception {
BigTextFile file = null;
Date startDate, endDate;
List<LoadVertexInfo> verticesInfo = new ArrayList<LoadVertexInfo>();
LoadVertexInfo vertexInfo = null;
long edgeCounter=0, lineCounter=0;
long vertexId;
try {
file = new BigTextFile(fileName);
startDate = new Date();
StringTokenizer tokenizer = null;
System.out.print("Loading\n[");
vertexId = 0;
for (String line : file) {
lineCounter++;
if (!line.startsWith("#") && !line.trim().equals("")) {
tokenizer = new StringTokenizer(line);
vertexInfo = new LoadVertexInfo(Long.parseLong(tokenizer.nextToken(",").trim()));
while (tokenizer.hasMoreTokens())
vertexInfo.addOutEdge(Long.parseLong(tokenizer.nextToken(",").trim()));
verticesInfo.add(vertexInfo);
vertexId++;
edgeCounter+=vertexInfo.getOutEdges().size();
if (verticesInfo.size() >= 300) {
processVertexInfo(graph, verticesInfo);
verticesInfo.clear();
System.out.println(vertexId + " vertices loaded...");
}
}
}
if (!verticesInfo.isEmpty()) {
processVertexInfo(graph, verticesInfo);
}
endDate = new Date();
System.out.println("File succesfully loaded in " + (endDate.getTime()-startDate.getTime()) +
"ms. " + (vertexId) +
" vertices and " + edgeCounter + " edges were created");
} catch (Exception e) {
System.out.println("Error on line: " + lineCounter);
ImgLogger.logError(e, "Error loading " + fileName);
} finally {
if (file!=null)
file.Close();
}
}
}