/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.util.commit; import java.io.IOException; import java.net.URI; import java.util.Collection; import java.util.List; import java.util.Properties; import java.util.Set; import javax.annotation.Nullable; import lombok.Getter; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import com.google.common.base.Function; import com.google.common.base.Optional; import com.google.common.collect.Lists; import com.google.common.collect.Iterables; import com.google.common.collect.Sets; import gobblin.commit.CommitStep; import gobblin.data.management.trash.Trash; import gobblin.data.management.trash.TrashFactory; import gobblin.util.PathUtils; /** * {@link CommitStep} to delete a set of paths in a {@link FileSystem}. * If {@link #parentDeletionLimit} is present, will also delete newly empty parent directories up to but not including * that limit. */ @Getter public class DeleteFileCommitStep implements CommitStep { private final Collection<FileStatus> pathsToDelete; private final Properties properties; private final URI fsUri; private final Optional<Path> parentDeletionLimit; public DeleteFileCommitStep(FileSystem fs, Path path, Properties properties) throws IOException { this(fs, Lists.newArrayList(fs.getFileStatus(path)), properties, Optional.<Path>absent()); } public static DeleteFileCommitStep fromPaths(FileSystem fs, Collection<Path> paths, Properties properties) throws IOException { return new DeleteFileCommitStep(fs, toFileStatus(fs, paths), properties, Optional.<Path>absent()); } public static DeleteFileCommitStep fromPaths(FileSystem fs, Collection<Path> paths, Properties properties, Path parentDeletionLimit) throws IOException { return new DeleteFileCommitStep(fs, toFileStatus(fs, paths), properties, Optional.of(parentDeletionLimit)); } /** * @param fs {@link FileSystem} where files need to be deleted. * @param paths Collection of {@link FileStatus}es to deleted. * @param properties {@link Properties} object including {@link Trash} configuration. * @param parentDeletionLimit if present, will delete empty parent directories up to but not including this path. If * absent, will not delete empty parent directories. * @throws IOException */ public DeleteFileCommitStep(FileSystem fs, Collection<FileStatus> paths, Properties properties, Optional<Path> parentDeletionLimit) throws IOException { this.fsUri = fs.getUri(); this.pathsToDelete = paths; this.properties = properties; this.parentDeletionLimit = parentDeletionLimit; } private static List<FileStatus> toFileStatus(FileSystem fs, Collection<Path> paths) throws IOException { List<FileStatus> fileStatuses = Lists.newArrayList(); for (Path path : paths) { fileStatuses.add(fs.getFileStatus(path)); } return fileStatuses; } @Override public boolean isCompleted() throws IOException { for (FileStatus pathToDelete : this.pathsToDelete) { if (existsAndIsExpectedFile(pathToDelete)) { return false; } } return true; } @Override public void execute() throws IOException { Trash trash = TrashFactory.createTrash(getFS(), this.properties); Set<Path> parents = Sets.newHashSet(); for (FileStatus pathToDelete : this.pathsToDelete) { if (existsAndIsExpectedFile(pathToDelete)) { trash.moveToTrash(pathToDelete.getPath()); parents.add(pathToDelete.getPath().getParent()); } } if (this.parentDeletionLimit.isPresent()) { for (Path parent : parents) { PathUtils.deleteEmptyParentDirectories(getFS(), this.parentDeletionLimit.get(), parent); } } } /** * Checks whether existing file in filesystem is the expected file (compares length and modificaiton time). */ private boolean existsAndIsExpectedFile(FileStatus status) throws IOException { if (!getFS().exists(status.getPath())) { return false; } FileStatus currentFileStatus = getFS().getFileStatus(status.getPath()); if (currentFileStatus.getLen() != status.getLen() || currentFileStatus.getModificationTime() > status.getModificationTime()) { return false; } return true; } private FileSystem getFS() throws IOException { return FileSystem.get(this.fsUri, new Configuration()); } @Override public String toString() { return String.format("Delete the following files at %s: %s", this.fsUri, Iterables.toString(Iterables.transform(this.pathsToDelete, new Function<FileStatus, Path>() { @Nullable @Override public Path apply(@Nullable FileStatus input) { return input != null ? input.getPath() : null; } }))); } }