/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.tinkerpop.gremlin.process.computer; import org.apache.commons.configuration.Configuration; import org.apache.tinkerpop.gremlin.process.traversal.traverser.TraverserRequirement; import org.apache.tinkerpop.gremlin.structure.Graph; import org.apache.tinkerpop.gremlin.structure.Vertex; import java.lang.reflect.Constructor; import java.util.Collections; import java.util.Optional; import java.util.Set; /** * A {@link VertexProgram} represents one component of a distributed graph computation. Each vertex in the graph * (logically) executes the {@link VertexProgram} instance in parallel. The collective behavior yields * the computational result. In practice, a "worker" (i.e. task, thread, etc.) is responsible for executing the * VertexProgram against each vertex that it has in its vertex set (a subset of the full graph vertex set). * At minimum there is one "worker" for each vertex, though this is impractical in practice and {@link GraphComputer} * implementations that leverage such a design are not expected to perform well due to the excess object creation. * Any local state/fields in a VertexProgram is static to the vertices within the same worker set. * It is not safe to assume that the VertexProgram's "worker" state will remain stable between iterations. * Hence, the existence of {@link VertexProgram#workerIterationStart} and {@link VertexProgram#workerIterationEnd}. * * @author Marko A. Rodriguez (http://markorodriguez.com) * @author Matthias Broecheler (me@matthiasb.com) */ public interface VertexProgram<M> extends Cloneable { public static final String VERTEX_PROGRAM = "gremlin.vertexProgram"; /** * When it is necessary to store the state of the VertexProgram, this method is called. * This is typically required when the VertexProgram needs to be serialized to another machine. * Note that what is stored is simply the instance/configuration state, not any processed data. * The default implementation provided simply stores the VertexProgram class name for reflective reconstruction. * It is typically a good idea to VertexProgram.super.storeState(). * * @param configuration the configuration to store the state of the VertexProgram in. */ public default void storeState(final Configuration configuration) { configuration.setProperty(VERTEX_PROGRAM, this.getClass().getName()); } /** * When it is necessary to load the state of the VertexProgram, this method is called. * This is typically required when the VertexProgram needs to be serialized to another machine. * Note that what is loaded is simply the instance state, not any processed data. * * @param graph the graph that the VertexProgram will run against * @param configuration the configuration to load the state of the VertexProgram from. */ public default void loadState(final Graph graph, final Configuration configuration) { } /** * The method is called at the beginning of the computation. * The method is global to the {@link GraphComputer} and as such, is not called for each vertex. * During this stage, the {@link Memory} should be initialized to to its "start state." * * @param memory The global memory of the GraphComputer */ public void setup(final Memory memory); /** * This method denotes the main body of the computation and is executed on each vertex in the graph. * This method is logically executed in parallel on all vertices in the graph. * When the {@link Memory} is read, it is according to the aggregated state yielded in the previous iteration. * When the {@link Memory} is written, the data will be aggregated at the end of the iteration for reading in the next iteration. * * @param vertex the {@link Vertex} to execute the {@link VertexProgram} on * @param messenger the messenger that moves data between vertices * @param memory the shared state between all vertices in the computation */ public void execute(final Vertex vertex, final Messenger<M> messenger, final Memory memory); /** * The method is called at the end of each iteration to determine if the computation is complete. * The method is global to the {@link GraphComputer} and as such, is not called for each {@link Vertex}. * The {@link Memory} maintains the aggregated data from the last execute() iteration. * * @param memory The global memory of the {@link GraphComputer} * @return whether or not to halt the computation */ public boolean terminate(final Memory memory); /** * This method is called at the start of each iteration of each "computational chunk." * The set of vertices in the graph are typically not processed with full parallelism. * The vertex set is split into subsets and a worker is assigned to call the {@link VertexProgram#execute} method. * The default implementation is a no-op. * * @param memory The memory at the start of the iteration. */ public default void workerIterationStart(final Memory memory) { } /** * This method is called at the end of each iteration of each "computational chunk." * The set of vertices in the graph are typically not processed with full parallelism. * The vertex set is split into subsets and a worker is assigned to call the {@link VertexProgram#execute} method. * The default implementation is a no-op. * * @param memory The memory at the end of the iteration. */ public default void workerIterationEnd(final Memory memory) { } /** * The {@link org.apache.tinkerpop.gremlin.structure.Element} properties that will be mutated during the computation. * All properties in the graph are readable, but only the keys specified here are writable. * The default is an empty set. * * @return the set of element keys that will be mutated during the vertex program's execution */ public default Set<VertexComputeKey> getVertexComputeKeys() { return Collections.emptySet(); } /** * The {@link Memory} keys that will be used during the computation. * These are the only keys that can be read or written throughout the life of the {@link GraphComputer}. * The default is an empty set. * * @return the set of memory keys that will be read/written */ public default Set<MemoryComputeKey> getMemoryComputeKeys() { return Collections.emptySet(); } /** * Combine the messages in route to a particular vertex. Useful to reduce the amount of data transmitted over the wire. * For example, instead of sending two objects that will ultimately be merged at the vertex destination, merge/combine into one and send that object. * If no message combiner is provider, then no messages will be combined. * Furthermore, it is not guaranteed the all messages in route to the vertex will be combined and thus, combiner-state should not be used. * The result of the vertex program algorithm should be the same regardless of whether message combining is executed or not. * * @return A optional denoting whether or not their is a message combine associated with the vertex program. */ public default Optional<MessageCombiner<M>> getMessageCombiner() { return Optional.empty(); } /** * This method returns all the {@link MessageScope} possibilities for a particular iteration of the vertex program. * The returned messages scopes are the scopes that will be used to send messages during the stated iteration. * It is not a requirement that all stated messages scopes be used, just that it is possible that they be used during the iteration. * * @param memory an immutable form of the {@link Memory} * @return all possible message scopes during said vertex program iteration */ public Set<MessageScope> getMessageScopes(final Memory memory); /** * The set of {@link MapReduce} jobs that are associated with the {@link VertexProgram}. * This is not necessarily the exhaustive list over the life of the {@link GraphComputer}. * If MapReduce jobs are declared by GraphComputer.mapReduce(), they are not contained in this set. * The default is an empty set. * * @return the set of {@link MapReduce} jobs associated with this {@link VertexProgram} */ public default Set<MapReduce> getMapReducers() { return Collections.emptySet(); } /** * The traverser requirements that are needed when this VP is used as part of a traversal. * The default is an empty set. * * @return the traverser requirements */ public default Set<TraverserRequirement> getTraverserRequirements() { return Collections.emptySet(); } /** * When multiple workers on a single machine need VertexProgram instances, it is possible to use clone. * This will provide a speedier way of generating instances, over the {@link VertexProgram#storeState} and {@link VertexProgram#loadState} model. * The default implementation simply returns the object as it assumes that the VertexProgram instance is a stateless singleton. * * @return A clone of the VertexProgram object */ @SuppressWarnings("CloneDoesntDeclareCloneNotSupportedException") public VertexProgram<M> clone(); public GraphComputer.ResultGraph getPreferredResultGraph(); public GraphComputer.Persist getPreferredPersist(); /** * A helper method to construct a {@link VertexProgram} given the content of the supplied configuration. * The class of the VertexProgram is read from the {@link VertexProgram#VERTEX_PROGRAM} static configuration key. * Once the VertexProgram is constructed, {@link VertexProgram#loadState} method is called with the provided graph and configuration. * * @param graph The graph that the vertex program will execute against * @param configuration A configuration with requisite information to build a vertex program * @param <V> The vertex program type * @return the newly constructed vertex program */ public static <V extends VertexProgram> V createVertexProgram(final Graph graph, final Configuration configuration) { try { final Class<V> vertexProgramClass = (Class) Class.forName(configuration.getString(VERTEX_PROGRAM)); final Constructor<V> constructor = vertexProgramClass.getDeclaredConstructor(); constructor.setAccessible(true); final V vertexProgram = constructor.newInstance(); vertexProgram.loadState(graph, configuration); return vertexProgram; } catch (final Exception e) { throw new IllegalStateException(e.getMessage(), e); } } public interface Builder { /** * This method should only be used by the underlying compute engine. For VertexProgram configurations, please * use specific fluent methods off the builder. */ public Builder configure(final Object... keyValues); public <P extends VertexProgram> P create(final Graph graph); } public default Features getFeatures() { return new Features() { }; } public interface Features { public default boolean requiresGlobalMessageScopes() { return false; } public default boolean requiresLocalMessageScopes() { return false; } public default boolean requiresVertexAddition() { return false; } public default boolean requiresVertexRemoval() { return false; } public default boolean requiresVertexPropertyAddition() { return false; } public default boolean requiresVertexPropertyRemoval() { return false; } public default boolean requiresEdgeAddition() { return false; } public default boolean requiresEdgeRemoval() { return false; } public default boolean requiresEdgePropertyAddition() { return false; } public default boolean requiresEdgePropertyRemoval() { return false; } } }