/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.backup; import java.io.IOException; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.HBaseInterfaceAudience; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.impl.BackupSystemTable; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.master.cleaner.BaseHFileCleanerDelegate; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Predicate; import com.google.common.collect.Iterables; /** * Implementation of a file cleaner that checks if an hfile is still referenced by backup before * deleting it from hfile archive directory. */ @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG) public class BackupHFileCleaner extends BaseHFileCleanerDelegate implements Abortable { private static final Log LOG = LogFactory.getLog(BackupHFileCleaner.class); private boolean stopped = false; private boolean aborted; private Configuration conf; private Connection connection; private long prevReadFromBackupTbl = 0, // timestamp of most recent read from backup:system table secondPrevReadFromBackupTbl = 0; // timestamp of 2nd most recent read from backup:system table //used by unit test to skip reading backup:system private boolean checkForFullyBackedUpTables = true; private List<TableName> fullyBackedUpTables = null; private Set<String> getFilenameFromBulkLoad(Map<byte[], List<Path>>[] maps) { Set<String> filenames = new HashSet<String>(); for (Map<byte[], List<Path>> map : maps) { if (map == null) continue; for (List<Path> paths : map.values()) { for (Path p : paths) { filenames.add(p.getName()); } } } return filenames; } private Set<String> loadHFileRefs(List<TableName> tableList) throws IOException { if (connection == null) { connection = ConnectionFactory.createConnection(conf); } try (BackupSystemTable tbl = new BackupSystemTable(connection)) { Map<byte[], List<Path>>[] res = tbl.readBulkLoadedFiles(null, tableList); secondPrevReadFromBackupTbl = prevReadFromBackupTbl; prevReadFromBackupTbl = EnvironmentEdgeManager.currentTime(); return getFilenameFromBulkLoad(res); } } @VisibleForTesting void setCheckForFullyBackedUpTables(boolean b) { checkForFullyBackedUpTables = b; } @Override public Iterable<FileStatus> getDeletableFiles(Iterable<FileStatus> files) { if (conf == null) { return files; } // obtain the Set of TableName's which have been fully backed up // so that we filter BulkLoad to be returned from server if (checkForFullyBackedUpTables) { if (connection == null) return files; try (BackupSystemTable tbl = new BackupSystemTable(connection)) { fullyBackedUpTables = tbl.getTablesForBackupType(BackupType.FULL); } catch (IOException ioe) { LOG.error("Failed to get tables which have been fully backed up, skipping checking", ioe); return Collections.emptyList(); } Collections.sort(fullyBackedUpTables); } final Set<String> hfileRefs; try { hfileRefs = loadHFileRefs(fullyBackedUpTables); } catch (IOException ioe) { LOG.error("Failed to read hfile references, skipping checking deletable files", ioe); return Collections.emptyList(); } Iterable<FileStatus> deletables = Iterables.filter(files, new Predicate<FileStatus>() { @Override public boolean apply(FileStatus file) { // If the file is recent, be conservative and wait for one more scan of backup:system table if (file.getModificationTime() > secondPrevReadFromBackupTbl) { return false; } String hfile = file.getPath().getName(); boolean foundHFileRef = hfileRefs.contains(hfile); return !foundHFileRef; } }); return deletables; } @Override public boolean isFileDeletable(FileStatus fStat) { // work is done in getDeletableFiles() return true; } @Override public void setConf(Configuration config) { this.conf = config; this.connection = null; try { this.connection = ConnectionFactory.createConnection(conf); } catch (IOException ioe) { LOG.error("Couldn't establish connection", ioe); } } @Override public void stop(String why) { if (this.stopped) { return; } if (this.connection != null) { try { this.connection.close(); } catch (IOException ioe) { LOG.debug("Got " + ioe + " when closing connection"); } } this.stopped = true; } @Override public boolean isStopped() { return this.stopped; } @Override public void abort(String why, Throwable e) { LOG.warn("Aborting ReplicationHFileCleaner because " + why, e); this.aborted = true; stop(why); } @Override public boolean isAborted() { return this.aborted; } }