/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package gobblin.data.management.trash; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import java.util.Properties; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.permission.FsAction; import org.apache.hadoop.fs.permission.FsPermission; import org.apache.hadoop.security.UserGroupInformation; import org.joda.time.DateTime; import org.joda.time.DateTimeZone; import org.joda.time.format.DateTimeFormat; import org.joda.time.format.DateTimeFormatter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import gobblin.util.PathUtils; /** * Flexible implementation of Trash similar to Hadoop trash. Allows for injecting cleanup policies for snapshots. */ public class Trash implements GobblinTrash { private static final Logger LOG = LoggerFactory.getLogger(Trash.class); private static final FsPermission PERM = new FsPermission(FsAction.ALL, FsAction.NONE, FsAction.NONE); private static final FsPermission ALL_PERM = new FsPermission(FsAction.ALL, FsAction.ALL, FsAction.ALL); /** * Location of trash directory in file system. The location can include a token $USER that will be automatically * replaced by the name of the active user. */ public static final String TRASH_LOCATION_KEY = "gobblin.trash.location"; public static final String SNAPSHOT_CLEANUP_POLICY_CLASS_KEY = "gobblin.trash.snapshot.cleanup.policy.class"; public static final String TRASH_SNAPSHOT_PREFIX = "_TRASH_SNAPSHOT_"; public static final String TRASH_IDENTIFIER_FILE = "_THIS_IS_TRASH_DIRECTORY"; public static final String DEFAULT_TRASH_DIRECTORY = "_GOBBLIN_TRASH"; public static final DateTimeFormatter TRASH_SNAPSHOT_NAME_FORMATTER = DateTimeFormat.forPattern(String.format("'%s'yyyyMMddHHmmss", TRASH_SNAPSHOT_PREFIX)).withZone(DateTimeZone.UTC); public static final PathFilter TRASH_SNAPSHOT_PATH_FILTER = new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().equals(TRASH_IDENTIFIER_FILE) && path.getName().startsWith(TRASH_SNAPSHOT_PREFIX); } }; public static final PathFilter TRASH_NOT_SNAPSHOT_PATH_FILTER = new PathFilter() { @Override public boolean accept(Path path) { return !path.getName().equals(TRASH_IDENTIFIER_FILE) && !path.getName().startsWith(TRASH_SNAPSHOT_PREFIX); } }; /** * Get trash location. * @return {@link org.apache.hadoop.fs.Path} for trash directory. * @throws IOException */ public Path getTrashLocation() throws IOException { return this.trashLocation; } /** * Create location of Trash directory. Parsed from props at key {@link #TRASH_LOCATION_KEY}, defaulting to * /home/directory/_GOBBLIN_TRASH. * @param fs {@link org.apache.hadoop.fs.FileSystem} where trash should be found. * @param props {@link java.util.Properties} containing trash configuration. * @param user If the trash location contains the token $USER, the token will be replaced by the value of user. * @return {@link org.apache.hadoop.fs.Path} for trash directory. * @throws java.io.IOException */ protected Path createTrashLocation(FileSystem fs, Properties props, String user) throws IOException { Path trashLocation; if (props.containsKey(TRASH_LOCATION_KEY)) { trashLocation = new Path(props.getProperty(TRASH_LOCATION_KEY).replaceAll("\\$USER", user)); } else { trashLocation = new Path(fs.getHomeDirectory(), DEFAULT_TRASH_DIRECTORY); LOG.info("Using default trash location at " + trashLocation); } if (!trashLocation.isAbsolute()) { throw new IllegalArgumentException("Trash location must be absolute. Found " + trashLocation.toString()); } Path qualifiedTrashLocation = fs.makeQualified(trashLocation); ensureTrashLocationExists(fs, qualifiedTrashLocation); return qualifiedTrashLocation; } protected void ensureTrashLocationExists(FileSystem fs, Path trashLocation) throws IOException { if (fs.exists(trashLocation)) { if (!fs.isDirectory(trashLocation)) { throw new IOException(String.format("Trash location %s is not a directory.", trashLocation)); } if (!fs.exists(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) { // If trash identifier file is not present, directory might have been created by user. // Add trash identifier file only if directory is empty. if (fs.listStatus(trashLocation).length > 0) { throw new IOException(String.format("Trash directory %s exists, but it does not look like a trash directory. " + "File: %s missing and directory is not empty.", trashLocation, TRASH_IDENTIFIER_FILE)); } else if (!fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE))) { throw new IOException(String.format("Failed to create file %s in existing trash directory %s.", TRASH_IDENTIFIER_FILE, trashLocation)); } } } else if (!(fs.mkdirs(trashLocation.getParent(), ALL_PERM) && fs.mkdirs(trashLocation, PERM) && fs.createNewFile(new Path(trashLocation, TRASH_IDENTIFIER_FILE)))) { // Failed to create directory or create trash identifier file. throw new IOException("Failed to create trash directory at " + trashLocation.toString()); } } protected final FileSystem fs; private final Path trashLocation; private final SnapshotCleanupPolicy snapshotCleanupPolicy; /** * @deprecated Use {@link gobblin.data.management.trash.TrashFactory}. */ @Deprecated public Trash(FileSystem fs) throws IOException { this(fs, new Properties()); } /** * @deprecated Use {@link gobblin.data.management.trash.TrashFactory}. */ @Deprecated public Trash(FileSystem fs, Properties props) throws IOException { this(fs, props, UserGroupInformation.getCurrentUser().getUserName()); } protected Trash(FileSystem fs, Properties props, String user) throws IOException { this.fs = fs; this.trashLocation = createTrashLocation(fs, props, user); try { Class<?> snapshotCleanupPolicyClass = Class.forName(props.getProperty(SNAPSHOT_CLEANUP_POLICY_CLASS_KEY, TimeBasedSnapshotCleanupPolicy.class.getCanonicalName())); this.snapshotCleanupPolicy = (SnapshotCleanupPolicy) snapshotCleanupPolicyClass.getConstructor(Properties.class).newInstance(props); } catch (Exception exception) { throw new IllegalArgumentException("Could not create snapshot cleanup policy with class " + props .getProperty(SNAPSHOT_CLEANUP_POLICY_CLASS_KEY, TimeBasedSnapshotCleanupPolicy.class.getCanonicalName()), exception); } } /** * Move a path to trash. The absolute path of the input path will be replicated under the trash directory. * @param path {@link org.apache.hadoop.fs.FileSystem} path to move to trash. * @return true if move to trash was done successfully. * @throws IOException */ @Override public boolean moveToTrash(Path path) throws IOException { Path fullyResolvedPath = path.isAbsolute() ? path : new Path(this.fs.getWorkingDirectory(), path); Path targetPathInTrash = PathUtils.mergePaths(this.trashLocation, fullyResolvedPath); if (!this.fs.exists(targetPathInTrash.getParent())) { this.fs.mkdirs(targetPathInTrash.getParent()); } else if (this.fs.exists(targetPathInTrash)) { targetPathInTrash = targetPathInTrash.suffix("_" + System.currentTimeMillis()); } return this.fs.rename(fullyResolvedPath, targetPathInTrash); } /** * Moves all current contents of trash directory into a snapshot directory with current timestamp. * @throws IOException */ public void createTrashSnapshot() throws IOException { FileStatus[] pathsInTrash = this.fs.listStatus(this.trashLocation, TRASH_NOT_SNAPSHOT_PATH_FILTER); if (pathsInTrash.length <= 0) { LOG.info("Nothing in trash. Will not create snapshot."); return; } Path snapshotDir = new Path(this.trashLocation, new DateTime().toString(TRASH_SNAPSHOT_NAME_FORMATTER)); if (this.fs.exists(snapshotDir)) { throw new IOException("New snapshot directory " + snapshotDir.toString() + " already exists."); } if (!this.fs.mkdirs(snapshotDir, PERM)) { throw new IOException("Failed to create new snapshot directory at " + snapshotDir.toString()); } LOG.info(String.format("Moving %d paths in Trash directory to newly created snapshot at %s.", pathsInTrash.length, snapshotDir.toString())); int pathsFailedToMove = 0; for (FileStatus fileStatus : pathsInTrash) { Path pathRelativeToTrash = PathUtils.relativizePath(fileStatus.getPath(), this.trashLocation); Path targetPath = new Path(snapshotDir, pathRelativeToTrash); boolean movedThisPath = true; try { movedThisPath = this.fs.rename(fileStatus.getPath(), targetPath); } catch (IOException exception) { LOG.error("Failed to move path " + fileStatus.getPath().toString() + " to snapshot.", exception); pathsFailedToMove += 1; continue; } if (!movedThisPath) { LOG.error("Failed to move path " + fileStatus.getPath().toString() + " to snapshot."); pathsFailedToMove += 1; } } if (pathsFailedToMove > 0) { LOG.error( String.format("Failed to move %d paths to the snapshot at %s.", pathsFailedToMove, snapshotDir.toString())); } } /** * For each existing trash snapshot, uses a {@link gobblin.data.management.trash.SnapshotCleanupPolicy} to determine whether * the snapshot should be deleted. If so, delete it permanently. * * <p> * Each existing snapshot will be passed to {@link gobblin.data.management.trash.SnapshotCleanupPolicy#shouldDeleteSnapshot} * from oldest to newest, and will be deleted if the method returns true. * </p> * * @throws IOException */ public void purgeTrashSnapshots() throws IOException { List<FileStatus> snapshotsInTrash = Arrays.asList(this.fs.listStatus(this.trashLocation, TRASH_SNAPSHOT_PATH_FILTER)); Collections.sort(snapshotsInTrash, new Comparator<FileStatus>() { @Override public int compare(FileStatus o1, FileStatus o2) { return TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(o1.getPath().getName()) .compareTo(TRASH_SNAPSHOT_NAME_FORMATTER.parseDateTime(o2.getPath().getName())); } }); int totalSnapshots = snapshotsInTrash.size(); int snapshotsDeleted = 0; for (FileStatus snapshot : snapshotsInTrash) { if (this.snapshotCleanupPolicy.shouldDeleteSnapshot(snapshot, this)) { try { boolean successfullyDeleted = this.fs.delete(snapshot.getPath(), true); if (successfullyDeleted) { snapshotsDeleted++; } else { LOG.error("Failed to delete snapshot " + snapshot.getPath()); } } catch (IOException exception) { LOG.error("Failed to delete snapshot " + snapshot.getPath(), exception); } } } LOG.info(String.format("Deleted %d out of %d existing snapshots.", snapshotsDeleted, totalSnapshots)); } }