/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.util; import java.io.IOException; import java.net.URI; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.google.common.base.Strings; import lombok.extern.slf4j.Slf4j; @Slf4j public class PathUtils { public static final Pattern GLOB_TOKENS = Pattern.compile("[,\\?\\*\\[\\{]"); public static Path mergePaths(Path path1, Path path2) { String path2Str = path2.toUri().getPath(); if (!path2Str.startsWith("/")) { path2Str = "/" + path2Str; } return new Path(path1.toUri().getScheme(), path1.toUri().getAuthority(), path1.toUri().getPath() + path2Str); } public static Path relativizePath(Path fullPath, Path pathPrefix) { return new Path(getPathWithoutSchemeAndAuthority(pathPrefix).toUri() .relativize(getPathWithoutSchemeAndAuthority(fullPath).toUri())); } /** * Checks whether possibleAncestor is an ancestor of fullPath. * @param possibleAncestor Possible ancestor of fullPath. * @param fullPath path to check. * @return true if possibleAncestor is an ancestor of fullPath. */ public static boolean isAncestor(Path possibleAncestor, Path fullPath) { return !relativizePath(fullPath, possibleAncestor).equals(getPathWithoutSchemeAndAuthority(fullPath)); } /** * Removes the Scheme and Authority from a Path. * * @see Path * @see URI */ public static Path getPathWithoutSchemeAndAuthority(Path path) { return new Path(null, null, path.toUri().getPath()); } /** * Removes the leading slash if present. * */ public static Path withoutLeadingSeparator(Path path) { return new Path(StringUtils.removeStart(path.toString(), Path.SEPARATOR)); } /** * Finds the deepest ancestor of input that is not a glob. */ public static Path deepestNonGlobPath(Path input) { Path commonRoot = input; while (commonRoot != null && isGlob(commonRoot)) { commonRoot = commonRoot.getParent(); } return commonRoot; } /** * @return true if path has glob tokens (e.g. *, {, \, }, etc.) */ public static boolean isGlob(Path path) { return (path != null) && GLOB_TOKENS.matcher(path.toString()).find(); } /** * Removes all <code>extensions</code> from <code>path</code> if they exist. * * <pre> * PathUtils.removeExtention("file.txt", ".txt") = file * PathUtils.removeExtention("file.txt.gpg", ".txt", ".gpg") = file * PathUtils.removeExtention("file", ".txt") = file * PathUtils.removeExtention("file.txt", ".tar.gz") = file.txt * PathUtils.removeExtention("file.txt.gpg", ".txt") = file.gpg * PathUtils.removeExtention("file.txt.gpg", ".gpg") = file.txt * </pre> * * @param path in which the <code>extensions</code> need to be removed * @param extensions to be removed * * @return a new {@link Path} without <code>extensions</code> */ public static Path removeExtension(Path path, String... extensions) { String pathString = path.toString(); for (String extension : extensions) { pathString = StringUtils.remove(pathString, extension); } return new Path(pathString); } /** * Suffix all <code>extensions</code> to <code>path</code>. * * <pre> * PathUtils.addExtension("/tmp/data/file", ".txt") = file.txt * PathUtils.addExtension("/tmp/data/file.txt.gpg", ".zip") = file.txt.gpg.zip * PathUtils.addExtension("/tmp/data/file.txt", ".tar", ".gz") = file.txt.tar.gz * PathUtils.addExtension("/tmp/data/file.txt.gpg", ".tar.txt") = file.txt.gpg.tar.txt * </pre> * * @param path to which the <code>extensions</code> need to be added * @param extensions to be added * * @return a new {@link Path} with <code>extensions</code> */ public static Path addExtension(Path path, String... extensions) { StringBuilder pathStringBuilder = new StringBuilder(path.toString()); for (String extension : extensions) { if (!Strings.isNullOrEmpty(extension)) { pathStringBuilder.append(extension); } } return new Path(pathStringBuilder.toString()); } public static Path combinePaths(String... paths) { if (paths.length == 0) { throw new IllegalArgumentException("Paths cannot be empty!"); } Path path = new Path(paths[0]); for (int i = 1; i < paths.length; i++) { path = new Path(path, paths[i]); } return path; } /** * Is an absolute path (ie a slash relative path part) * AND a scheme is null AND authority is null. */ public static boolean isAbsoluteAndSchemeAuthorityNull(Path path) { return (path.isAbsolute() && path.toUri().getScheme() == null && path.toUri().getAuthority() == null); } /** * Deletes empty directories starting with startPath and all ancestors up to but not including limitPath. * @param fs {@link FileSystem} where paths are located. * @param limitPath only {@link Path}s that are strict descendants of this path will be deleted. * @param startPath first {@link Path} to delete. Afterwards empty ancestors will be deleted. * @throws IOException */ public static void deleteEmptyParentDirectories(FileSystem fs, Path limitPath, Path startPath) throws IOException { if (PathUtils.isAncestor(limitPath, startPath) && !PathUtils.getPathWithoutSchemeAndAuthority(limitPath) .equals(PathUtils.getPathWithoutSchemeAndAuthority(startPath)) && fs.listStatus(startPath).length == 0) { if (!fs.delete(startPath, false)) { log.warn("Failed to delete empty directory " + startPath); } else { log.info("Deleted empty directory " + startPath); } deleteEmptyParentDirectories(fs, limitPath, startPath.getParent()); } } /** * Compare two path without shedme and authority (the prefix) * @param path1 * @param path2 * @return */ public static boolean compareWithoutSchemeAndAuthority(Path path1, Path path2) { return PathUtils.getPathWithoutSchemeAndAuthority(path1).equals(getPathWithoutSchemeAndAuthority(path2)); } }