/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.backup.impl; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.backup.impl.BackupSystemTable.WALItem; import org.apache.hadoop.hbase.backup.master.LogRollMasterProcedureManager; import org.apache.hadoop.hbase.backup.util.BackupUtils; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.Admin; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.wal.AbstractFSWALProvider; /** * After a full backup was created, the incremental backup will only store the changes made after * the last full or incremental backup. Creating the backup copies the logfiles in .logs and * .oldlogs since the last backup timestamp. */ @InterfaceAudience.Private public class IncrementalBackupManager extends BackupManager { public static final Log LOG = LogFactory.getLog(IncrementalBackupManager.class); public IncrementalBackupManager(Connection conn, Configuration conf) throws IOException { super(conn, conf); } /** * Obtain the list of logs that need to be copied out for this incremental backup. The list is set * in BackupInfo. * @return The new HashMap of RS log time stamps after the log roll for this incremental backup. * @throws IOException exception */ public HashMap<String, Long> getIncrBackupLogFileMap() throws IOException { List<String> logList; HashMap<String, Long> newTimestamps; HashMap<String, Long> previousTimestampMins; String savedStartCode = readBackupStartCode(); // key: tableName // value: <RegionServer,PreviousTimeStamp> HashMap<TableName, HashMap<String, Long>> previousTimestampMap = readLogTimestampMap(); previousTimestampMins = BackupUtils.getRSLogTimestampMins(previousTimestampMap); if (LOG.isDebugEnabled()) { LOG.debug("StartCode " + savedStartCode + "for backupID " + backupInfo.getBackupId()); } // get all new log files from .logs and .oldlogs after last TS and before new timestamp if (savedStartCode == null || previousTimestampMins == null || previousTimestampMins.isEmpty()) { throw new IOException( "Cannot read any previous back up timestamps from backup system table. " + "In order to create an incremental backup, at least one full backup is needed."); } LOG.info("Execute roll log procedure for incremental backup ..."); HashMap<String, String> props = new HashMap<String, String>(); props.put("backupRoot", backupInfo.getBackupRootDir()); try (Admin admin = conn.getAdmin();) { admin.execProcedure(LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_SIGNATURE, LogRollMasterProcedureManager.ROLLLOG_PROCEDURE_NAME, props); } newTimestamps = readRegionServerLastLogRollResult(); logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, conf, savedStartCode); List<WALItem> logFromSystemTable = getLogFilesFromBackupSystem(previousTimestampMins, newTimestamps, getBackupInfo() .getBackupRootDir()); logList = excludeAlreadyBackedUpWALs(logList, logFromSystemTable); backupInfo.setIncrBackupFileList(logList); return newTimestamps; } /** * Get list of WAL files eligible for incremental backup * @return list of WAL files * @throws IOException */ public List<String> getIncrBackupLogFileList() throws IOException { List<String> logList; HashMap<String, Long> newTimestamps; HashMap<String, Long> previousTimestampMins; String savedStartCode = readBackupStartCode(); // key: tableName // value: <RegionServer,PreviousTimeStamp> HashMap<TableName, HashMap<String, Long>> previousTimestampMap = readLogTimestampMap(); previousTimestampMins = BackupUtils.getRSLogTimestampMins(previousTimestampMap); if (LOG.isDebugEnabled()) { LOG.debug("StartCode " + savedStartCode + "for backupID " + backupInfo.getBackupId()); } // get all new log files from .logs and .oldlogs after last TS and before new timestamp if (savedStartCode == null || previousTimestampMins == null || previousTimestampMins.isEmpty()) { throw new IOException( "Cannot read any previous back up timestamps from backup system table. " + "In order to create an incremental backup, at least one full backup is needed."); } newTimestamps = readRegionServerLastLogRollResult(); logList = getLogFilesForNewBackup(previousTimestampMins, newTimestamps, conf, savedStartCode); List<WALItem> logFromSystemTable = getLogFilesFromBackupSystem(previousTimestampMins, newTimestamps, getBackupInfo() .getBackupRootDir()); logList = excludeAlreadyBackedUpWALs(logList, logFromSystemTable); backupInfo.setIncrBackupFileList(logList); return logList; } private List<String> excludeAlreadyBackedUpWALs(List<String> logList, List<WALItem> logFromSystemTable) { Set<String> walFileNameSet = convertToSet(logFromSystemTable); List<String> list = new ArrayList<String>(); for (int i=0; i < logList.size(); i++) { Path p = new Path(logList.get(i)); String name = p.getName(); if (walFileNameSet.contains(name)) continue; list.add(logList.get(i)); } return list; } /** * Create Set of WAL file names (not full path names) * @param logFromSystemTable * @return set of WAL file names */ private Set<String> convertToSet(List<WALItem> logFromSystemTable) { Set<String> set = new HashSet<String>(); for (int i=0; i < logFromSystemTable.size(); i++) { WALItem item = logFromSystemTable.get(i); set.add(item.walFile); } return set; } /** * For each region server: get all log files newer than the last timestamps, but not newer than * the newest timestamps. * @param olderTimestamps timestamp map for each region server of the last backup. * @param newestTimestamps timestamp map for each region server that the backup should lead to. * @return list of log files which needs to be added to this backup * @throws IOException */ private List<WALItem> getLogFilesFromBackupSystem(HashMap<String, Long> olderTimestamps, HashMap<String, Long> newestTimestamps, String backupRoot) throws IOException { List<WALItem> logFiles = new ArrayList<WALItem>(); Iterator<WALItem> it = getWALFilesFromBackupSystem(); while (it.hasNext()) { WALItem item = it.next(); String rootDir = item.getBackupRoot(); if (!rootDir.equals(backupRoot)) { continue; } String walFileName = item.getWalFile(); String server = BackupUtils.parseHostNameFromLogFile(new Path(walFileName)); if (server == null) { continue; } Long tss = getTimestamp(walFileName); Long oldTss = olderTimestamps.get(server); Long newTss = newestTimestamps.get(server); if (oldTss == null) { logFiles.add(item); continue; } if (newTss == null) { newTss = Long.MAX_VALUE; } if (tss > oldTss && tss < newTss) { logFiles.add(item); } } return logFiles; } private Long getTimestamp(String walFileName) { int index = walFileName.lastIndexOf(BackupUtils.LOGNAME_SEPARATOR); return Long.parseLong(walFileName.substring(index + 1)); } /** * For each region server: get all log files newer than the last timestamps but not newer than the * newest timestamps. * @param olderTimestamps the timestamp for each region server of the last backup. * @param newestTimestamps the timestamp for each region server that the backup should lead to. * @param conf the Hadoop and Hbase configuration * @param savedStartCode the startcode (timestamp) of last successful backup. * @return a list of log files to be backed up * @throws IOException exception */ private List<String> getLogFilesForNewBackup(HashMap<String, Long> olderTimestamps, HashMap<String, Long> newestTimestamps, Configuration conf, String savedStartCode) throws IOException { LOG.debug("In getLogFilesForNewBackup()\n" + "olderTimestamps: " + olderTimestamps + "\n newestTimestamps: " + newestTimestamps); Path rootdir = FSUtils.getRootDir(conf); Path logDir = new Path(rootdir, HConstants.HREGION_LOGDIR_NAME); Path oldLogDir = new Path(rootdir, HConstants.HREGION_OLDLOGDIR_NAME); FileSystem fs = rootdir.getFileSystem(conf); NewestLogFilter pathFilter = new NewestLogFilter(); List<String> resultLogFiles = new ArrayList<String>(); List<String> newestLogs = new ArrayList<String>(); /* * The old region servers and timestamps info we kept in backup system table may be out of sync * if new region server is added or existing one lost. We'll deal with it here when processing * the logs. If data in backup system table has more hosts, just ignore it. If the .logs * directory includes more hosts, the additional hosts will not have old timestamps to compare * with. We'll just use all the logs in that directory. We always write up-to-date region server * and timestamp info to backup system table at the end of successful backup. */ FileStatus[] rss; Path p; String host; Long oldTimeStamp; String currentLogFile; long currentLogTS; // Get the files in .logs. rss = fs.listStatus(logDir); for (FileStatus rs : rss) { p = rs.getPath(); host = BackupUtils.parseHostNameFromLogFile(p); if (host == null) { continue; } FileStatus[] logs; oldTimeStamp = olderTimestamps.get(host); // It is possible that there is no old timestamp in backup system table for this host if // this region server is newly added after our last backup. if (oldTimeStamp == null) { logs = fs.listStatus(p); } else { pathFilter.setLastBackupTS(oldTimeStamp); logs = fs.listStatus(p, pathFilter); } for (FileStatus log : logs) { LOG.debug("currentLogFile: " + log.getPath().toString()); if (AbstractFSWALProvider.isMetaFile(log.getPath())) { if (LOG.isDebugEnabled()) { LOG.debug("Skip hbase:meta log file: " + log.getPath().getName()); } continue; } currentLogFile = log.getPath().toString(); resultLogFiles.add(currentLogFile); currentLogTS = BackupUtils.getCreationTime(log.getPath()); // newestTimestamps is up-to-date with the current list of hosts // so newestTimestamps.get(host) will not be null. if (currentLogTS > newestTimestamps.get(host)) { newestLogs.add(currentLogFile); } } } // Include the .oldlogs files too. FileStatus[] oldlogs = fs.listStatus(oldLogDir); for (FileStatus oldlog : oldlogs) { p = oldlog.getPath(); currentLogFile = p.toString(); if (AbstractFSWALProvider.isMetaFile(p)) { if (LOG.isDebugEnabled()) { LOG.debug("Skip .meta log file: " + currentLogFile); } continue; } host = BackupUtils.parseHostFromOldLog(p); if (host == null) { continue; } currentLogTS = BackupUtils.getCreationTime(p); oldTimeStamp = olderTimestamps.get(host); /* * It is possible that there is no old timestamp in backup system table for this host. At the * time of our last backup operation, this rs did not exist. The reason can be one of the two: * 1. The rs already left/crashed. Its logs were moved to .oldlogs. 2. The rs was added after * our last backup. */ if (oldTimeStamp == null) { if (currentLogTS < Long.parseLong(savedStartCode)) { // This log file is really old, its region server was before our last backup. continue; } else { resultLogFiles.add(currentLogFile); } } else if (currentLogTS > oldTimeStamp) { resultLogFiles.add(currentLogFile); } // It is possible that a host in .oldlogs is an obsolete region server // so newestTimestamps.get(host) here can be null. // Even if these logs belong to a obsolete region server, we still need // to include they to avoid loss of edits for backup. Long newTimestamp = newestTimestamps.get(host); if (newTimestamp != null && currentLogTS > newTimestamp) { newestLogs.add(currentLogFile); } } // remove newest log per host because they are still in use resultLogFiles.removeAll(newestLogs); return resultLogFiles; } static class NewestLogFilter implements PathFilter { private Long lastBackupTS = 0L; public NewestLogFilter() { } protected void setLastBackupTS(Long ts) { this.lastBackupTS = ts; } @Override public boolean accept(Path path) { // skip meta table log -- ts.meta file if (AbstractFSWALProvider.isMetaFile(path)) { if (LOG.isDebugEnabled()) { LOG.debug("Skip .meta log file: " + path.getName()); } return false; } long timestamp; try { timestamp = BackupUtils.getCreationTime(path); return timestamp > lastBackupTS; } catch (Exception e) { LOG.warn("Cannot read timestamp of log file " + path); return false; } } } }