/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hive.common; import java.io.IOException; import java.net.URI; import java.util.BitSet; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.util.Shell.ShellCommandExecutor; /** * Collection of file manipulation utilities common across Hive. */ public final class FileUtils { /** * Variant of Path.makeQualified that qualifies the input path against the default file system * indicated by the configuration * * This does not require a FileSystem handle in most cases - only requires the Filesystem URI. * This saves the cost of opening the Filesystem - which can involve RPCs - as well as cause * errors * * @param path * path to be fully qualified * @param conf * Configuration file * @return path qualified relative to default file system */ public static Path makeQualified(Path path, Configuration conf) throws IOException { if (!path.isAbsolute()) { // in this case we need to get the working directory // and this requires a FileSystem handle. So revert to // original method. return path.makeQualified(FileSystem.get(conf)); } URI fsUri = FileSystem.getDefaultUri(conf); URI pathUri = path.toUri(); String scheme = pathUri.getScheme(); String authority = pathUri.getAuthority(); // validate/fill-in scheme and authority. this follows logic // identical to FileSystem.get(URI, conf) - but doesn't actually // obtain a file system handle if (scheme == null) { // no scheme - use default file system uri scheme = fsUri.getScheme(); authority = fsUri.getAuthority(); if (authority == null) { authority = ""; } } else { if (authority == null) { // no authority - use default one if it applies if (scheme.equals(fsUri.getScheme()) && fsUri.getAuthority() != null) { authority = fsUri.getAuthority(); } else { authority = ""; } } } return new Path(scheme, authority, pathUri.getPath()); } private FileUtils() { // prevent instantiation } public static String makePartName(List<String> partCols, List<String> vals) { return makePartName(partCols, vals, null); } /** * Makes a valid partition name. * @param partCols The partition keys' names * @param vals The partition values * @param defaultStr * The default name given to a partition value if the respective value is empty or null. * @return An escaped, valid partition name. */ public static String makePartName(List<String> partCols, List<String> vals, String defaultStr) { StringBuilder name = new StringBuilder(); for (int i = 0; i < partCols.size(); i++) { if (i > 0) { name.append(Path.SEPARATOR); } name.append(escapePathName((partCols.get(i)).toLowerCase(), defaultStr)); name.append('='); name.append(escapePathName(vals.get(i), defaultStr)); } return name.toString(); } // NOTE: This is for generating the internal path name for partitions. Users // should always use the MetaStore API to get the path name for a partition. // Users should not directly take partition values and turn it into a path // name by themselves, because the logic below may change in the future. // // In the future, it's OK to add new chars to the escape list, and old data // won't be corrupt, because the full path name in metastore is stored. // In that case, Hive will continue to read the old data, but when it creates // new partitions, it will use new names. static BitSet charToEscape = new BitSet(128); static { for (char c = 0; c < ' '; c++) { charToEscape.set(c); } /** * ASCII 01-1F are HTTP control characters that need to be escaped. * \u000A and \u000D are \n and \r, respectively. */ char[] clist = new char[] {'\u0001', '\u0002', '\u0003', '\u0004', '\u0005', '\u0006', '\u0007', '\u0008', '\u0009', '\n', '\u000B', '\u000C', '\r', '\u000E', '\u000F', '\u0010', '\u0011', '\u0012', '\u0013', '\u0014', '\u0015', '\u0016', '\u0017', '\u0018', '\u0019', '\u001A', '\u001B', '\u001C', '\u001D', '\u001E', '\u001F', '"', '#', '%', '\'', '*', '/', ':', '=', '?', '\\', '\u007F', '{', '[', ']', '^'}; for (char c : clist) { charToEscape.set(c); } } static boolean needsEscaping(char c) { return c >= 0 && c < charToEscape.size() && charToEscape.get(c); } public static String escapePathName(String path) { return escapePathName(path, null); } /** * Escapes a path name. * @param path The path to escape. * @param defaultPath * The default name for the path, if the given path is empty or null. * @return An escaped path name. */ public static String escapePathName(String path, String defaultPath) { // __HIVE_DEFAULT_NULL__ is the system default value for null and empty string. // TODO: we should allow user to specify default partition or HDFS file location. if (path == null || path.length() == 0) { if (defaultPath == null) { //previously, when path is empty or null and no default path is specified, // __HIVE_DEFAULT_PARTITION__ was the return value for escapePathName return "__HIVE_DEFAULT_PARTITION__"; } else { return defaultPath; } } StringBuilder sb = new StringBuilder(); for (int i = 0; i < path.length(); i++) { char c = path.charAt(i); if (needsEscaping(c)) { sb.append('%'); sb.append(String.format("%1$02X", (int) c)); } else { sb.append(c); } } return sb.toString(); } public static String unescapePathName(String path) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < path.length(); i++) { char c = path.charAt(i); if (c == '%' && i + 2 < path.length()) { int code = -1; try { code = Integer.valueOf(path.substring(i + 1, i + 3), 16); } catch (Exception e) { code = -1; } if (code >= 0) { sb.append((char) code); i += 2; continue; } } sb.append(c); } return sb.toString(); } /** * Recursively lists status for all files starting from a particular directory (or individual file * as base case). * * @param fs * file system * * @param fileStatus * starting point in file system * * @param results * receives enumeration of all files found */ public static void listStatusRecursively(FileSystem fs, FileStatus fileStatus, List<FileStatus> results) throws IOException { if (fileStatus.isDir()) { for (FileStatus stat : fs.listStatus(fileStatus.getPath())) { listStatusRecursively(fs, stat, results); } } else { results.add(fileStatus); } } /** * Archive all the files in the inputFiles into outputFile * * @param inputFiles * @param outputFile * @throws IOException */ public static void tar(String parentDir, String[] inputFiles, String outputFile) throws IOException { StringBuffer tarCommand = new StringBuffer(); tarCommand.append("cd " + parentDir + " ; "); tarCommand.append(" tar -zcvf "); tarCommand.append(" " + outputFile); for (int i = 0; i < inputFiles.length; i++) { tarCommand.append(" " + inputFiles[i]); } String[] shellCmd = {"bash", "-c", tarCommand.toString()}; ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd); shexec.execute(); int exitcode = shexec.getExitCode(); if (exitcode != 0) { throw new IOException("Error tarring file " + outputFile + ". Tar process exited with exit code " + exitcode); } } }