MapReduceRestoreJob.java example

Explorer
hbase-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.backup.mapreduce;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.backup.BackupRestoreConstants;
import org.apache.hadoop.hbase.backup.RestoreJob;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.mapreduce.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.mapreduce.WALPlayer;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.util.Tool;

/**
 * MapReduce implementation of {@link RestoreJob}
 *
 * For full backup restore, it runs {@link HFileSplitterJob} job and creates
 * HFiles which are aligned with a region boundaries of a table being
 * restored, for incremental backup restore it runs {@link WALPlayer} in
 * bulk load mode (creates HFiles from WAL edits).
 *
 * The resulting HFiles then are loaded using HBase bulk load tool
 * {@link LoadIncrementalHFiles}
 */
@InterfaceAudience.Private
public class MapReduceRestoreJob implements RestoreJob {
  public static final Log LOG = LogFactory.getLog(MapReduceRestoreJob.class);

  private Tool player;
  private Configuration conf;

  public MapReduceRestoreJob() {
  }

  @Override
  public void run(Path[] dirPaths, TableName[] tableNames, TableName[] newTableNames,
      boolean fullBackupRestore) throws IOException {

    String bulkOutputConfKey;

    player = new HFileSplitterJob();
    bulkOutputConfKey = HFileSplitterJob.BULK_OUTPUT_CONF_KEY;
    // Player reads all files in arbitrary directory structure and creates
    // a Map task for each file
    String dirs = StringUtils.join(dirPaths, ",");

    if (LOG.isDebugEnabled()) {
      LOG.debug("Restore " + (fullBackupRestore ? "full" : "incremental")
          + " backup from directory " + dirs + " from hbase tables "
          + StringUtils.join(tableNames, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND) +
          " to tables "
          + StringUtils.join(newTableNames, BackupRestoreConstants.TABLENAME_DELIMITER_IN_COMMAND));
    }

    for (int i = 0; i < tableNames.length; i++) {

      LOG.info("Restore " + tableNames[i] + " into " + newTableNames[i]);

      Path bulkOutputPath = getBulkOutputDir(getFileNameCompatibleString(newTableNames[i]));
      Configuration conf = getConf();
      conf.set(bulkOutputConfKey, bulkOutputPath.toString());
      String[] playerArgs =
        { dirs,
          fullBackupRestore? newTableNames[i].getNameAsString():tableNames[i].getNameAsString()
        };

      int result = 0;
      int loaderResult = 0;
      try {

        player.setConf(getConf());
        result = player.run(playerArgs);
        if (succeeded(result)) {
          // do bulk load
          LoadIncrementalHFiles loader = createLoader(getConf());
          if (LOG.isDebugEnabled()) {
            LOG.debug("Restoring HFiles from directory " + bulkOutputPath);
          }
          String[] args = { bulkOutputPath.toString(), newTableNames[i].getNameAsString() };
          loaderResult = loader.run(args);

          if (failed(loaderResult)) {
            throw new IOException("Can not restore from backup directory " + dirs
                + " (check Hadoop and HBase logs). Bulk loader return code =" + loaderResult);
          }
        } else {
          throw new IOException("Can not restore from backup directory " + dirs
              + " (check Hadoop/MR and HBase logs). Player return code =" + result);
        }
        LOG.debug("Restore Job finished:" + result);
      } catch (Exception e) {
        throw new IOException("Can not restore from backup directory " + dirs
            + " (check Hadoop and HBase logs) ", e);
      }

    }
  }

  private String getFileNameCompatibleString(TableName table) {
    return table.getNamespaceAsString() + "-" + table.getQualifierAsString();
  }

  private boolean failed(int result) {
    return result != 0;
  }

  private boolean succeeded(int result) {
    return result == 0;
  }

  public static LoadIncrementalHFiles createLoader(Configuration config) throws IOException {
    // set configuration for restore:
    // LoadIncrementalHFile needs more time
    // <name>hbase.rpc.timeout</name> <value>600000</value>
    // calculates
    Integer milliSecInHour = 3600000;
    Configuration conf = new Configuration(config);
    conf.setInt(HConstants.HBASE_RPC_TIMEOUT_KEY, milliSecInHour);

    // By default, it is 32 and loader will fail if # of files in any region exceed this
    // limit. Bad for snapshot restore.
    conf.setInt(LoadIncrementalHFiles.MAX_FILES_PER_REGION_PER_FAMILY, Integer.MAX_VALUE);
    conf.set(LoadIncrementalHFiles.IGNORE_UNMATCHED_CF_CONF_KEY, "yes");
    LoadIncrementalHFiles loader = null;
    try {
      loader = new LoadIncrementalHFiles(conf);
    } catch (Exception e) {
      throw new IOException(e);
    }
    return loader;
  }

  private Path getBulkOutputDir(String tableName) throws IOException {
    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    String tmp =
        conf.get(HConstants.TEMPORARY_FS_DIRECTORY_KEY,
          HConstants.DEFAULT_TEMPORARY_HDFS_DIRECTORY);
    Path path =
        new Path(tmp + Path.SEPARATOR + "bulk_output-" + tableName + "-"
            + EnvironmentEdgeManager.currentTime());
    fs.deleteOnExit(path);
    return path;
  }

  @Override
  public Configuration getConf() {
    return conf;
  }

  @Override
  public void setConf(Configuration conf) {
    this.conf = conf;
  }

}