/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.tinkerpop.gremlin.hadoop.process.computer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.LocatedFileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.tinkerpop.gremlin.hadoop.Constants;
import org.apache.tinkerpop.gremlin.hadoop.structure.HadoopGraph;
import org.apache.tinkerpop.gremlin.hadoop.structure.util.ConfUtil;
import org.apache.tinkerpop.gremlin.process.computer.GraphComputer;
import org.apache.tinkerpop.gremlin.process.computer.GraphFilter;
import org.apache.tinkerpop.gremlin.process.computer.MapReduce;
import org.apache.tinkerpop.gremlin.process.computer.VertexProgram;
import org.apache.tinkerpop.gremlin.process.computer.util.GraphComputerHelper;
import org.apache.tinkerpop.gremlin.process.traversal.Traversal;
import org.apache.tinkerpop.gremlin.structure.Edge;
import org.apache.tinkerpop.gremlin.structure.Vertex;
import org.apache.tinkerpop.gremlin.structure.util.StringFactory;
import org.apache.tinkerpop.gremlin.util.Gremlin;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* @author Marko A. Rodriguez (http://markorodriguez.com)
*/
public abstract class AbstractHadoopGraphComputer implements GraphComputer {
private final static Pattern PATH_PATTERN =
Pattern.compile(File.pathSeparator.equals(":") ? "([^:]|://)+" : ("[^" + File.pathSeparator + "]"));
protected final Logger logger;
protected final HadoopGraph hadoopGraph;
protected boolean executed = false;
protected final Set<MapReduce> mapReducers = new HashSet<>();
protected VertexProgram<Object> vertexProgram;
protected int workers = 1;
protected ResultGraph resultGraph = null;
protected Persist persist = null;
protected GraphFilter graphFilter = new GraphFilter();
public AbstractHadoopGraphComputer(final HadoopGraph hadoopGraph) {
this.hadoopGraph = hadoopGraph;
this.logger = LoggerFactory.getLogger(this.getClass());
}
@Override
public GraphComputer vertices(final Traversal<Vertex, Vertex> vertexFilter) {
this.graphFilter.setVertexFilter(vertexFilter);
return this;
}
@Override
public GraphComputer edges(final Traversal<Vertex, Edge> edgeFilter) {
this.graphFilter.setEdgeFilter(edgeFilter);
return this;
}
@Override
public GraphComputer result(final ResultGraph resultGraph) {
this.resultGraph = resultGraph;
return this;
}
@Override
public GraphComputer persist(final Persist persist) {
this.persist = persist;
return this;
}
@Override
public GraphComputer program(final VertexProgram vertexProgram) {
this.vertexProgram = vertexProgram;
return this;
}
@Override
public GraphComputer mapReduce(final MapReduce mapReduce) {
this.mapReducers.add(mapReduce);
return this;
}
@Override
public GraphComputer workers(final int workers) {
this.workers = workers;
return this;
}
@Override
public String toString() {
return StringFactory.graphComputerString(this);
}
protected void validateStatePriorToExecution() {
// a graph computer can only be executed one time
if (this.executed)
throw Exceptions.computerHasAlreadyBeenSubmittedAVertexProgram();
else
this.executed = true;
// it is not possible execute a computer if it has no vertex program nor mapreducers
if (null == this.vertexProgram && this.mapReducers.isEmpty())
throw GraphComputer.Exceptions.computerHasNoVertexProgramNorMapReducers();
// it is possible to run mapreducers without a vertex program
if (null != this.vertexProgram) {
GraphComputerHelper.validateProgramOnComputer(this, vertexProgram);
this.mapReducers.addAll(this.vertexProgram.getMapReducers());
}
// if the user didn't set desired persistence/resultgraph, then get from vertex program or else, no persistence
this.persist = GraphComputerHelper.getPersistState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.persist));
this.resultGraph = GraphComputerHelper.getResultGraphState(Optional.ofNullable(this.vertexProgram), Optional.ofNullable(this.resultGraph));
// determine persistence and result graph options
if (!this.features().supportsResultGraphPersistCombination(this.resultGraph, this.persist))
throw GraphComputer.Exceptions.resultGraphPersistCombinationNotSupported(this.resultGraph, this.persist);
// if too many workers are requested, throw appropriate exception
if (this.workers > this.features().getMaxWorkers())
throw GraphComputer.Exceptions.computerRequiresMoreWorkersThanSupported(this.workers, this.features().getMaxWorkers());
}
protected void loadJars(final Configuration hadoopConfiguration, final Object... params) {
if (hadoopConfiguration.getBoolean(Constants.GREMLIN_HADOOP_JARS_IN_DISTRIBUTED_CACHE, true)) {
final String hadoopGremlinLibs = null == System.getProperty(Constants.HADOOP_GREMLIN_LIBS) ? System.getenv(Constants.HADOOP_GREMLIN_LIBS) : System.getProperty(Constants.HADOOP_GREMLIN_LIBS);
if (null == hadoopGremlinLibs)
this.logger.warn(Constants.HADOOP_GREMLIN_LIBS + " is not set -- proceeding regardless");
else {
try {
final Matcher matcher = PATH_PATTERN.matcher(hadoopGremlinLibs);
while (matcher.find()) {
final String path = matcher.group();
FileSystem fs;
try {
final URI uri = new URI(path);
fs = FileSystem.get(uri, hadoopConfiguration);
} catch (URISyntaxException e) {
fs = FileSystem.get(hadoopConfiguration);
}
final File file = AbstractHadoopGraphComputer.copyDirectoryIfNonExistent(fs, path);
if (file.exists()) {
for (final File f : file.listFiles()) {
if (f.getName().endsWith(Constants.DOT_JAR)) {
loadJar(hadoopConfiguration, f, params);
}
}
} else
this.logger.warn(path + " does not reference a valid directory -- proceeding regardless");
}
} catch (IOException e) {
throw new IllegalStateException(e.getMessage(), e);
}
}
}
}
protected abstract void loadJar(final Configuration hadoopConfiguration, final File file, final Object... params)
throws IOException;
@Override
public Features features() {
return new Features();
}
public class Features implements GraphComputer.Features {
@Override
public boolean supportsVertexAddition() {
return false;
}
@Override
public boolean supportsVertexRemoval() {
return false;
}
@Override
public boolean supportsVertexPropertyRemoval() {
return false;
}
@Override
public boolean supportsEdgeAddition() {
return false;
}
@Override
public boolean supportsEdgeRemoval() {
return false;
}
@Override
public boolean supportsEdgePropertyAddition() {
return false;
}
@Override
public boolean supportsEdgePropertyRemoval() {
return false;
}
@Override
public boolean supportsResultGraphPersistCombination(final ResultGraph resultGraph, final Persist persist) {
if (hadoopGraph.configuration().containsKey(Constants.GREMLIN_HADOOP_GRAPH_WRITER)) {
final Object writer = ReflectionUtils.newInstance(hadoopGraph.configuration().getGraphWriter(), ConfUtil.makeHadoopConfiguration(hadoopGraph.configuration()));
if (writer instanceof PersistResultGraphAware)
return ((PersistResultGraphAware) writer).supportsResultGraphPersistCombination(resultGraph, persist);
else {
logger.warn(writer.getClass() + " does not implement " + PersistResultGraphAware.class.getSimpleName() + " and thus, persistence options are unknown -- assuming all options are possible");
return true;
}
} else {
logger.warn("No " + Constants.GREMLIN_HADOOP_GRAPH_WRITER + " property provided and thus, persistence options are unknown -- assuming all options are possible");
return true;
}
}
@Override
public boolean supportsDirectObjects() {
return false;
}
}
//////////
public static File copyDirectoryIfNonExistent(final FileSystem fileSystem, final String directory) {
try {
final String hadoopGremlinLibsRemote = "hadoop-gremlin-" + Gremlin.version() + "-libs";
final Path path = new Path(directory);
if (Boolean.valueOf(System.getProperty("is.testing", "false")) || (fileSystem.exists(path) && fileSystem.isDirectory(path))) {
final File tempDirectory = new File(System.getProperty("java.io.tmpdir") + File.separator + hadoopGremlinLibsRemote);
assert tempDirectory.exists() || tempDirectory.mkdirs();
final String tempPath = tempDirectory.getAbsolutePath() + File.separator + path.getName();
final RemoteIterator<LocatedFileStatus> files = fileSystem.listFiles(path, false);
while (files.hasNext()) {
final LocatedFileStatus f = files.next();
fileSystem.copyToLocalFile(false, f.getPath(), new Path(tempPath + System.getProperty("file.separator") + f.getPath().getName()), true);
}
return new File(tempPath);
} else
return new File(directory);
} catch (final IOException e) {
throw new IllegalStateException(e.getMessage(), e);
}
}
}