/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.tools; import org.apache.commons.cli.Option; import org.apache.hadoop.conf.Configuration; /** * Enumeration mapping configuration keys to distcp command line * options. */ public enum DistCpOptionSwitch { /** * Ignores any failures during copy, and continues with rest. * Logs failures in a file */ IGNORE_FAILURES(DistCpConstants.CONF_LABEL_IGNORE_FAILURES, new Option("i", false, "Ignore failures during copy")), /** * Preserves status of file/path in the target. * Default behavior with -p, is to preserve replication, * block size, user, group and permission on the target file * * If any of the optional switches are present among rbugp, then * only the corresponding file attribute is preserved * */ PRESERVE_STATUS(DistCpConstants.CONF_LABEL_PRESERVE_STATUS, new Option("p", true, "preserve status (rbugp)" + "(replication, block-size, user, group, permission)")), /** * Update target location by copying only files that are missing * in the target. This can be used to periodically sync two folders * across source and target. Typically used with DELETE_MISSING * Incompatible with ATOMIC_COMMIT */ SYNC_FOLDERS(DistCpConstants.CONF_LABEL_SYNC_FOLDERS, new Option("update", false, "Update target, copying only missing" + "files or directories")), /** * Deletes missing files in target that are missing from source * This allows the target to be in sync with the source contents * Typically used in conjunction with SYNC_FOLDERS * Incompatible with ATOMIC_COMMIT */ DELETE_MISSING(DistCpConstants.CONF_LABEL_DELETE_MISSING, new Option("delete", false, "Delete from target, " + "files missing in source")), /** * Configuration file to use with hftps:// for securely copying * files across clusters. Typically the configuration file contains * truststore/keystore information such as location, password and type */ SSL_CONF(DistCpConstants.CONF_LABEL_SSL_CONF, new Option("mapredSslConf", true, "Configuration for ssl config file" + ", to use with hftps://")), /** * Max number of maps to use during copy. DistCp will split work * as equally as possible among these maps */ MAX_MAPS(DistCpConstants.CONF_LABEL_MAX_MAPS, new Option("m", true, "Max number of concurrent maps to use for copy")), /** * Source file listing can be provided to DistCp in a file. * This allows DistCp to copy random list of files from source * and copy them to target */ SOURCE_FILE_LISTING(DistCpConstants.CONF_LABEL_SOURCE_LISTING, new Option("f", true, "List of files that need to be copied")), /** * Copy all the source files and commit them atomically to the target * This is typically useful in cases where there is a process * polling for availability of a file/dir. This option is incompatible * with SYNC_FOLDERS & DELETE_MISSING */ ATOMIC_COMMIT(DistCpConstants.CONF_LABEL_ATOMIC_COPY, new Option("atomic", false, "Commit all changes or none")), /** * Work path to be used only in conjunction in Atomic commit */ WORK_PATH(DistCpConstants.CONF_LABEL_WORK_PATH, new Option("tmp", true, "Intermediate work path to be used for atomic commit")), /** * Log path where distcp output logs are written to */ LOG_PATH(DistCpConstants.CONF_LABEL_LOG_PATH, new Option("log", true, "Folder on DFS where distcp execution logs are saved")), /** * Copy strategy is use. This could be dynamic or uniform size etc. * DistCp would use an appropriate input format based on this. */ COPY_STRATEGY(DistCpConstants.CONF_LABEL_COPY_STRATEGY, new Option("strategy", true, "Copy strategy to use. Default is " + "dividing work based on file sizes")), /** * Skip CRC checks between source and target, when determining what * files need to be copied. */ SKIP_CRC(DistCpConstants.CONF_LABEL_SKIP_CRC, new Option("skipcrccheck", false, "Whether to skip CRC checks between " + "source and target paths.")), /** * Overwrite target-files unconditionally. */ OVERWRITE(DistCpConstants.CONF_LABEL_OVERWRITE, new Option("overwrite", false, "Choose to overwrite target files " + "unconditionally, even if they exist.")), /** * Should DisctpExecution be blocking */ BLOCKING("", new Option("async", false, "Should distcp execution be blocking")), FILE_LIMIT("", new Option("filelimit", true, "(Deprecated!) Limit number of files " + "copied to <= n")), SIZE_LIMIT("", new Option("sizelimit", true, "(Deprecated!) Limit number of files " + "copied to <= n bytes")), /** * Specify bandwidth per map in MB */ BANDWIDTH(DistCpConstants.CONF_LABEL_BANDWIDTH_MB, new Option("bandwidth", true, "Specify bandwidth per map in MB")); private final String confLabel; private final Option option; DistCpOptionSwitch(String confLabel, Option option) { this.confLabel = confLabel; this.option = option; } /** * Get Configuration label for the option * @return configuration label name */ public String getConfigLabel() { return confLabel; } /** * Get CLI Option corresponding to the distcp option * @return option */ public Option getOption() { return option; } /** * Get Switch symbol * @return switch symbol char */ public String getSwitch() { return option.getOpt(); } @Override public String toString() { return super.name() + " {" + "confLabel='" + confLabel + '\'' + ", option=" + option + '}'; } /** * Helper function to add an option to hadoop configuration object * @param conf - Configuration object to include the option * @param option - Option to add * @param value - Value */ public static void addToConf(Configuration conf, DistCpOptionSwitch option, String value) { conf.set(option.getConfigLabel(), value); } /** * Helper function to set an option to hadoop configuration object * @param conf - Configuration object to include the option * @param option - Option to add */ public static void addToConf(Configuration conf, DistCpOptionSwitch option) { conf.set(option.getConfigLabel(), "true"); } }