// Copyright 2017 JanusGraph Authors // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package org.janusgraph.hadoop.config.job; import java.io.File; import java.io.IOException; import java.util.LinkedList; import java.util.List; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.google.common.collect.ImmutableList; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Abstract base class for {@link org.janusgraph.hadoop.config.job.JobClasspathConfigurer} * implementations that use Hadoop's distributed cache to store push classfiles to the cluster. */ public abstract class AbstractDistCacheConfigurer { // public static enum FileCopyMode { // // /** // * Copy a jar unless a file with the same name already exists in the staging // * directory of the Hadoop FileSystem. // */ // FILENAME, // // /** // * Copy a jar unless a file with the same name and same modtime already exists // * in the staging directory of the Hadoop FileSystem. // */ // MODTIME, // // /** // * Unconditionally copy all jars to the Hadoop FileSystem, even if they // * already exist at the destination and have up-to-date modtimes. // */ // ALWAYS; // } // // public static final ConfigOption<Boolean> SKIP_LOCAL_COPIES = // new ConfigOption<Boolean>(JanusGraphHadoopConfiguration.JARCACHE_NS, "skip-local-copies", // "When this option is true and Hadoop is configured to use a LocalFileSystem as " + // "its default, JanusGraph will not attempt to copy jars from the classpath to the " + // "LocalFileSystem (which is redundant when using the local JobRunner)", ConfigOption.Type.MASKABLE, true); private final Conf conf; private static final String HDFS_TMP_LIB_DIR = "janusgraphlib"; private static final Logger log = LoggerFactory.getLogger(AbstractDistCacheConfigurer.class); public AbstractDistCacheConfigurer(String mapredJarFilename) { this.conf = configureByClasspath(mapredJarFilename); } public String getMapredJar() { return conf.mapredJar; } public ImmutableList<Path> getLocalPaths() { return conf.paths; } protected Path uploadFileIfNecessary(FileSystem localFS, Path localPath, FileSystem destFS) throws IOException { // Fast path for local FS -- DistributedCache + local JobRunner seems copy/link files automatically if (destFS.equals(localFS)) { log.debug("Skipping file upload for {} (destination filesystem {} equals local filesystem)", localPath, destFS); return localPath; } Path destPath = new Path(destFS.getHomeDirectory() + "/" + HDFS_TMP_LIB_DIR + "/" + localPath.getName()); Stats fileStats = null; try { fileStats = compareModtimes(localFS, localPath, destFS, destPath); } catch (IOException e) { log.warn("Unable to read or stat file: localPath={}, destPath={}, destFS={}", localPath, destPath, destFS); } if (!fileStats.isRemoteCopyCurrent()) { log.debug("Copying {} to {}", localPath, destPath); destFS.copyFromLocalFile(localPath, destPath); if (null != fileStats.local) { final long mtime = fileStats.local.getModificationTime(); log.debug("Setting modtime on {} to {}", destPath, mtime); destFS.setTimes(destPath, mtime, -1); // -1 means leave atime alone } } return destPath; } private Stats compareModtimes(FileSystem localFS, Path localPath, FileSystem destFS, Path destPath) throws IOException { Stats s = new Stats(); s.local = localFS.getFileStatus(localPath); if (destFS.exists(destPath)) { s.dest = destFS.getFileStatus(destPath); if (null != s.dest && null != s.local) { long l = s.local.getModificationTime(); long d = s.dest.getModificationTime(); if (l == d) { if (log.isDebugEnabled()) log.debug("File {} with modtime {} is up-to-date", destPath, d); } else if (l < d) { log.warn("File {} has newer modtime ({}) than our local copy {} ({})", destPath, d, localPath, l); } else { log.debug("Remote file {} exists but is out-of-date: local={} dest={}", destPath, l, d); } } else { log.debug("Unable to stat file(s): [LOCAL: path={} stat={}] [DEST: path={} stat={}]", localPath, s.local, destPath, s.dest); } } else { log.debug("File {} does not exist", destPath); } return s; } private static Conf configureByClasspath(String mapredJarFilename) { List<Path> paths = new LinkedList<Path>(); final String classpath = System.getProperty("java.class.path"); final String mrj = mapredJarFilename.toLowerCase(); String mapredJarPath = null; for (String cpentry : classpath.split(File.pathSeparator)) { if (cpentry.toLowerCase().endsWith(".jar") || cpentry.toLowerCase().endsWith(".properties")) { paths.add(new Path(cpentry)); if (cpentry.toLowerCase().endsWith(mrj)); mapredJarPath = cpentry; } } return new Conf(paths, mapredJarPath); } private static class Conf { private final ImmutableList<Path> paths; private final String mapredJar; public Conf(List<Path> paths, String mapredJar) { this.paths = ImmutableList.copyOf(paths); this.mapredJar = mapredJar; } } private static class Stats { private FileStatus local; private FileStatus dest; private boolean isRemoteCopyCurrent() { return null != local && null != dest && dest.getModificationTime() == local.getModificationTime(); } } // LocalFileSystem doesn't checksum, it just returns null, so this is useless // private boolean compareChecksums(FileSystem localFS, Path localPath, FileSystem destFS, Path destPath) throws IOException { // if (destFS.exists(destPath)) { // FileChecksum localCheck = localFS.getFileChecksum(localPath); // FileChecksum destCheck = destFS.getFileChecksum(destPath); // if (null != destCheck && null != localCheck) { // byte[] db = destCheck.getBytes(); // byte[] lb = localCheck.getBytes(); // if (null != db && null != lb && Arrays.equals(db, lb)) { // if (log.isDebugEnabled()) // log.debug("Checksum {} for file {} is up-to-date", Arrays.toString(db), destPath); // return true; // } else { // log.debug("Checksum mismatch on file {}: local={} dest={}", destPath, lb, db); // } // } else { // log.debug("Unable to checksum files: localPath={} localCheck={}, destPath={} destCheck={}", // localPath, localCheck, destPath, destCheck); // } // } else { // log.debug("File {} does not exist", destPath); // } // return false; // } }