// Copyright 2017 JanusGraph Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package org.janusgraph.graphdb.olap.computer;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import org.janusgraph.core.JanusGraphException;
import org.janusgraph.core.JanusGraphComputer;
import org.janusgraph.core.JanusGraphTransaction;
import org.janusgraph.core.schema.JanusGraphManagement;
import org.janusgraph.diskstorage.configuration.Configuration;
import org.janusgraph.diskstorage.keycolumnvalue.scan.ScanMetrics;
import org.janusgraph.diskstorage.keycolumnvalue.scan.StandardScanner;
import org.janusgraph.graphdb.configuration.GraphDatabaseConfiguration;
import org.janusgraph.graphdb.database.StandardJanusGraph;
import org.janusgraph.graphdb.util.WorkerPool;
import org.apache.tinkerpop.gremlin.process.computer.ComputerResult;
import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
import org.apache.tinkerpop.gremlin.process.computer.GraphFilter;
import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
import org.apache.tinkerpop.gremlin.process.computer.VertexComputeKey;
import org.apache.tinkerpop.gremlin.process.computer.util.DefaultComputerResult;
import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
import org.apache.tinkerpop.gremlin.process.computer.util.VertexProgramHelper;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.structure.Graph;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.VertexProperty;
import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
import org.apache.tinkerpop.gremlin.structure.util.empty.EmptyGraph;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicInteger;
/**
* @author Matthias Broecheler (me@matthiasb.com)
*/
public class FulgoraGraphComputer implements JanusGraphComputer {
private static final Logger log =
LoggerFactory.getLogger(FulgoraGraphComputer.class);
private VertexProgram<?> vertexProgram;
private final Set<MapReduce> mapReduces = new HashSet<>();
private final StandardJanusGraph graph;
private int expectedNumVertices = 10000;
private FulgoraMemory memory;
private FulgoraVertexMemory vertexMemory;
private boolean executed = false;
private int numThreads = 1;//Math.max(1,Runtime.getRuntime().availableProcessors());
private int readBatchSize = 10000;
private int writeBatchSize;
private ResultGraph resultGraphMode = null;
private Persist persistMode = null;
private static final AtomicInteger computerCounter = new AtomicInteger(0);
private String name;
private String jobId;
private final GraphFilter graphFilter = new GraphFilter();
public FulgoraGraphComputer(final StandardJanusGraph graph, final Configuration configuration) {
this.graph = graph;
this.writeBatchSize = configuration.get(GraphDatabaseConfiguration.BUFFER_SIZE);
this.readBatchSize = this.writeBatchSize * 10;
this.name = "compute" + computerCounter.incrementAndGet();
}
@Override
public GraphComputer vertices(final Traversal<Vertex, Vertex> vertexFilter) {
this.graphFilter.setVertexFilter(vertexFilter);
return this;
}
@Override
public GraphComputer edges(final Traversal<Vertex, Edge> edgeFilter) {
this.graphFilter.setEdgeFilter(edgeFilter);
return this;
}
@Override
public GraphComputer result(ResultGraph resultGraph) {
Preconditions.checkArgument(resultGraph != null, "Need to specify mode");
this.resultGraphMode = resultGraph;
return this;
}
@Override
public GraphComputer persist(Persist persist) {
Preconditions.checkArgument(persist != null, "Need to specify mode");
this.persistMode = persist;
return this;
}
@Override
public JanusGraphComputer workers(int threads) {
Preconditions.checkArgument(threads > 0, "Invalid number of threads: %s", threads);
numThreads = threads;
return this;
}
@Override
public GraphComputer program(final VertexProgram vertexProgram) {
Preconditions.checkState(this.vertexProgram == null, "A vertex program has already been set");
this.vertexProgram = vertexProgram;
return this;
}
@Override
public GraphComputer mapReduce(final MapReduce mapReduce) {
this.mapReduces.add(mapReduce);
return this;
}
@Override
public Future<ComputerResult> submit() {
if (executed)
throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
else
executed = true;
// it is not possible execute a computer if it has no vertex program nor mapreducers
if (null == vertexProgram && mapReduces.isEmpty())
throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
// it is possible to run mapreducers without a vertex program
if (null != vertexProgram) {
GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
this.mapReduces.addAll(this.vertexProgram.getMapReducers());
}
// if the user didn't set desired persistence/resultgraph, then get from vertex program or else, no persistence
this.persistMode = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persistMode));
this.resultGraphMode = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraphMode));
// determine the legality persistence and result graph options
if (!this.features().supportsResultGraphPersistCombination(this.resultGraphMode, this.persistMode))
throw GraphComputer.Exceptions.resultGraphPersistCombinationNotSupported(this.resultGraphMode, this.persistMode);
// ensure requested workers are not larger than supported workers
if (this.numThreads > this.features().getMaxWorkers())
throw GraphComputer.Exceptions.computerRequiresMoreWorkersThanSupported(this.numThreads, this.features().getMaxWorkers());
memory = new FulgoraMemory(vertexProgram, mapReduces);
return CompletableFuture.<ComputerResult>supplyAsync(() -> {
final long time = System.currentTimeMillis();
if (null != vertexProgram) {
// ##### Execute vertex program
vertexMemory = new FulgoraVertexMemory(expectedNumVertices, graph.getIDManager(), vertexProgram);
// execute the vertex program
vertexProgram.setup(memory);
try (VertexProgramScanJob.Executor job = VertexProgramScanJob.getVertexProgramScanJob(graph, memory, vertexMemory, vertexProgram)) {
for (int iteration = 1; ; iteration++) {
memory.completeSubRound();
vertexMemory.nextIteration(vertexProgram.getMessageScopes(memory));
jobId = name + "#" + iteration;
StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
scanBuilder.setJobId(jobId);
scanBuilder.setNumProcessingThreads(numThreads);
scanBuilder.setWorkBlockSize(readBatchSize);
scanBuilder.setJob(job);
PartitionedVertexProgramExecutor pvpe = new PartitionedVertexProgramExecutor(graph, memory, vertexMemory, vertexProgram);
try {
//Iterates over all vertices and computes the vertex program on all non-partitioned vertices. For partitioned ones, the data is aggregated
ScanMetrics jobResult = scanBuilder.execute().get();
long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
if (failures > 0) {
throw new JanusGraphException("Failed to process [" + failures + "] vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
}
//Runs the vertex program on all aggregated, partitioned vertices.
pvpe.run(numThreads, jobResult);
failures = jobResult.getCustom(PartitionedVertexProgramExecutor.PARTITION_VERTEX_POSTFAIL);
if (failures > 0) {
throw new JanusGraphException("Failed to process [" + failures + "] partitioned vertices in vertex program iteration [" + iteration + "]. Computer is aborting.");
}
} catch (Exception e) {
throw new JanusGraphException(e);
}
vertexMemory.completeIteration();
memory.completeSubRound();
try {
if (this.vertexProgram.terminate(this.memory)) {
break;
}
} finally {
memory.incrIteration();
}
}
}
}
// ##### Execute mapreduce jobs
// Collect map jobs
Map<MapReduce, FulgoraMapEmitter> mapJobs = new HashMap<>(mapReduces.size());
for (MapReduce mapReduce : mapReduces) {
if (mapReduce.doStage(MapReduce.Stage.MAP)) {
FulgoraMapEmitter mapEmitter = new FulgoraMapEmitter<>(mapReduce.doStage(MapReduce.Stage.REDUCE));
mapJobs.put(mapReduce, mapEmitter);
}
}
// Execute map jobs
jobId = name + "#map";
try (VertexMapJob.Executor job = VertexMapJob.getVertexMapJob(graph, vertexMemory, mapJobs)) {
StandardScanner.Builder scanBuilder = graph.getBackend().buildEdgeScanJob();
scanBuilder.setJobId(jobId);
scanBuilder.setNumProcessingThreads(numThreads);
scanBuilder.setWorkBlockSize(readBatchSize);
scanBuilder.setJob(job);
try {
ScanMetrics jobResult = scanBuilder.execute().get();
long failures = jobResult.get(ScanMetrics.Metric.FAILURE);
if (failures > 0) {
throw new JanusGraphException("Failed to process [" + failures + "] vertices in map phase. Computer is aborting.");
}
failures = jobResult.getCustom(VertexMapJob.MAP_JOB_FAILURE);
if (failures > 0) {
throw new JanusGraphException("Failed to process [" + failures + "] individual map jobs. Computer is aborting.");
}
} catch (Exception e) {
throw new JanusGraphException(e);
}
// Execute reduce phase and add to memory
for (Map.Entry<MapReduce, FulgoraMapEmitter> mapJob : mapJobs.entrySet()) {
FulgoraMapEmitter<?, ?> mapEmitter = mapJob.getValue();
MapReduce mapReduce = mapJob.getKey();
mapEmitter.complete(mapReduce); // sort results if a map output sort is defined
if (mapReduce.doStage(MapReduce.Stage.REDUCE)) {
final FulgoraReduceEmitter<?, ?> reduceEmitter = new FulgoraReduceEmitter<>();
try (WorkerPool workers = new WorkerPool(numThreads)) {
workers.submit(() -> mapReduce.workerStart(MapReduce.Stage.REDUCE));
for (final Map.Entry queueEntry : mapEmitter.reduceMap.entrySet()) {
if (null == queueEntry) break;
workers.submit(() -> mapReduce.reduce(queueEntry.getKey(), ((Iterable) queueEntry.getValue()).iterator(), reduceEmitter));
}
workers.submit(() -> mapReduce.workerEnd(MapReduce.Stage.REDUCE));
} catch (Exception e) {
throw new JanusGraphException("Exception while executing reduce phase", e);
}
// mapEmitter.reduceMap.entrySet().parallelStream().forEach(entry -> mapReduce.reduce(entry.getKey(), entry.getValue().iterator(), reduceEmitter));
reduceEmitter.complete(mapReduce); // sort results if a reduce output sort is defined
mapReduce.addResultToMemory(this.memory, reduceEmitter.reduceQueue.iterator());
} else {
mapReduce.addResultToMemory(this.memory, mapEmitter.mapQueue.iterator());
}
}
}
memory.attachReferenceElements(graph);
// #### Write mutated properties back into graph
Graph resultgraph = graph;
if (persistMode == Persist.NOTHING && resultGraphMode == ResultGraph.NEW) {
resultgraph = EmptyGraph.instance();
} else if (persistMode != Persist.NOTHING && vertexProgram != null && !vertexProgram.getVertexComputeKeys().isEmpty()) {
//First, create property keys in graph if they don't already exist
JanusGraphManagement mgmt = graph.openManagement();
try {
for (VertexComputeKey key : vertexProgram.getVertexComputeKeys()) {
if (!mgmt.containsPropertyKey(key.getKey()))
log.warn("Property key [{}] is not part of the schema and will be created. It is advised to initialize all keys.", key.getKey());
mgmt.getOrCreatePropertyKey(key.getKey());
}
mgmt.commit();
} finally {
if (mgmt != null && mgmt.isOpen()) mgmt.rollback();
}
//TODO: Filter based on VertexProgram
Map<Long, Map<String, Object>> mutatedProperties = Maps.transformValues(vertexMemory.getMutableVertexProperties(),
new Function<Map<String, Object>, Map<String, Object>>() {
@Nullable
@Override
public Map<String, Object> apply(@Nullable Map<String, Object> o) {
return Maps.filterKeys(o, s -> !VertexProgramHelper.isTransientVertexComputeKey(s, vertexProgram.getVertexComputeKeys()));
}
});
if (resultGraphMode == ResultGraph.ORIGINAL) {
AtomicInteger failures = new AtomicInteger(0);
try (WorkerPool workers = new WorkerPool(numThreads)) {
List<Map.Entry<Long, Map<String, Object>>> subset = new ArrayList<>(writeBatchSize / vertexProgram.getVertexComputeKeys().size());
int currentSize = 0;
for (Map.Entry<Long, Map<String, Object>> entry : mutatedProperties.entrySet()) {
subset.add(entry);
currentSize += entry.getValue().size();
if (currentSize >= writeBatchSize) {
workers.submit(new VertexPropertyWriter(subset, failures));
subset = new ArrayList<>(subset.size());
currentSize = 0;
}
}
if (!subset.isEmpty()) workers.submit(new VertexPropertyWriter(subset, failures));
} catch (Exception e) {
throw new JanusGraphException("Exception while attempting to persist result into graph", e);
}
if (failures.get() > 0)
throw new JanusGraphException("Could not persist program results to graph. Check log for details.");
} else if (resultGraphMode == ResultGraph.NEW) {
resultgraph = graph.newTransaction();
for (Map.Entry<Long, Map<String, Object>> vprop : mutatedProperties.entrySet()) {
Vertex v = resultgraph.vertices(vprop.getKey()).next();
for (Map.Entry<String, Object> prop : vprop.getValue().entrySet()) {
v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
}
}
}
}
// update runtime and return the newly computed graph
this.memory.setRuntime(System.currentTimeMillis() - time);
this.memory.complete();
return new DefaultComputerResult(resultgraph, this.memory);
});
}
private class VertexPropertyWriter implements Runnable {
private final List<Map.Entry<Long, Map<String, Object>>> properties;
private final AtomicInteger failures;
private VertexPropertyWriter(List<Map.Entry<Long, Map<String, Object>>> properties, AtomicInteger failures) {
assert properties != null && !properties.isEmpty() && failures != null;
this.properties = properties;
this.failures = failures;
}
@Override
public void run() {
JanusGraphTransaction tx = graph.buildTransaction().enableBatchLoading().start();
try {
for (Map.Entry<Long, Map<String, Object>> vprop : properties) {
Vertex v = tx.getVertex(vprop.getKey());
for (Map.Entry<String, Object> prop : vprop.getValue().entrySet()) {
v.property(VertexProperty.Cardinality.single, prop.getKey(), prop.getValue());
}
}
tx.commit();
} catch (Throwable e) {
failures.incrementAndGet();
log.error("Encountered exception while trying to write properties: ", e);
} finally {
if (tx != null && tx.isOpen()) tx.rollback();
}
}
}
@Override
public String toString() {
return StringFactory.graphComputerString(this);
}
@Override
public Features features() {
return new Features() {
@Override
public boolean supportsVertexAddition() {
return false;
}
@Override
public boolean supportsVertexRemoval() {
return false;
}
@Override
public boolean supportsVertexPropertyAddition() {
return true;
}
@Override
public boolean supportsVertexPropertyRemoval() {
return false;
}
@Override
public boolean supportsEdgeAddition() {
return false;
}
@Override
public boolean supportsEdgeRemoval() {
return false;
}
@Override
public boolean supportsEdgePropertyAddition() {
return false;
}
@Override
public boolean supportsEdgePropertyRemoval() {
return false;
}
@Override
public boolean supportsGraphFilter() {
return false;
}
};
}
}