MultiTableInputFormatTestBase.java example

Explorer
hbase-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.mapreduce;

import com.google.common.collect.Lists;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.CategoryBasedTimeout;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.TestRule;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.NavigableMap;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

/**
 * Base set of tests and setup for input formats touching multiple tables.
 */
public abstract class MultiTableInputFormatTestBase {
  @Rule public final TestRule timeout = CategoryBasedTimeout.builder().
      withTimeout(this.getClass()).withLookingForStuckThread(true).build();
  static final Log LOG = LogFactory.getLog(TestMultiTableInputFormat.class);
  public static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
  static final String TABLE_NAME = "scantest";
  static final byte[] INPUT_FAMILY = Bytes.toBytes("contents");
  static final String KEY_STARTROW = "startRow";
  static final String KEY_LASTROW = "stpRow";

  static List<String> TABLES = Lists.newArrayList();

  static {
    for (int i = 0; i < 3; i++) {
      TABLES.add(TABLE_NAME + String.valueOf(i));
    }
  }

  @BeforeClass
  public static void setUpBeforeClass() throws Exception {
    // switch TIF to log at DEBUG level
    TEST_UTIL.enableDebug(MultiTableInputFormatBase.class);
    // start mini hbase cluster
    TEST_UTIL.startMiniCluster(3);
    // create and fill table
    for (String tableName : TABLES) {
      try (Table table =
          TEST_UTIL.createMultiRegionTable(TableName.valueOf(tableName),
            INPUT_FAMILY, 4)) {
        TEST_UTIL.loadTable(table, INPUT_FAMILY, false);
      }
    }
  }

  @AfterClass
  public static void tearDownAfterClass() throws Exception {
    TEST_UTIL.shutdownMiniCluster();
  }

  @After
  public void tearDown() throws Exception {
    Configuration c = TEST_UTIL.getConfiguration();
    FileUtil.fullyDelete(new File(c.get("hadoop.tmp.dir")));
  }

  /**
   * Pass the key and value to reducer.
   */
  public static class ScanMapper extends
      TableMapper<ImmutableBytesWritable, ImmutableBytesWritable> {
    /**
     * Pass the key and value to reduce.
     *
     * @param key The key, here "aaa", "aab" etc.
     * @param value The value is the same as the key.
     * @param context The task context.
     * @throws IOException When reading the rows fails.
     */
    @Override
    public void map(ImmutableBytesWritable key, Result value, Context context)
        throws IOException, InterruptedException {
      makeAssertions(key, value);
      context.write(key, key);
    }

    public void makeAssertions(ImmutableBytesWritable key, Result value) throws IOException {
      if (value.size() != 1) {
        throw new IOException("There should only be one input column");
      }
      Map<byte[], NavigableMap<byte[], NavigableMap<Long, byte[]>>> cf =
          value.getMap();
      if (!cf.containsKey(INPUT_FAMILY)) {
        throw new IOException("Wrong input columns. Missing: '" +
            Bytes.toString(INPUT_FAMILY) + "'.");
      }
      String val = Bytes.toStringBinary(value.getValue(INPUT_FAMILY, null));
      LOG.debug("map: key -> " + Bytes.toStringBinary(key.get()) +
          ", value -> " + val);
    }
  }

  /**
   * Checks the last and first keys seen against the scanner boundaries.
   */
  public static class ScanReducer
      extends
      Reducer<ImmutableBytesWritable, ImmutableBytesWritable,
          NullWritable, NullWritable> {
    private String first = null;
    private String last = null;

    @Override
    protected void reduce(ImmutableBytesWritable key,
        Iterable<ImmutableBytesWritable> values, Context context)
        throws IOException, InterruptedException {
      makeAssertions(key, values);
    }

    protected void makeAssertions(ImmutableBytesWritable key,
        Iterable<ImmutableBytesWritable> values) {
      int count = 0;
      for (ImmutableBytesWritable value : values) {
        String val = Bytes.toStringBinary(value.get());
        LOG.debug("reduce: key[" + count + "] -> " +
            Bytes.toStringBinary(key.get()) + ", value -> " + val);
        if (first == null) first = val;
        last = val;
        count++;
      }
      assertEquals(3, count);
    }

    @Override
    protected void cleanup(Context context) throws IOException,
        InterruptedException {
      Configuration c = context.getConfiguration();
      cleanup(c);
    }

    protected void cleanup(Configuration c) {
      String startRow = c.get(KEY_STARTROW);
      String lastRow = c.get(KEY_LASTROW);
      LOG.info("cleanup: first -> \"" + first + "\", start row -> \"" +
          startRow + "\"");
      LOG.info("cleanup: last -> \"" + last + "\", last row -> \"" + lastRow +
          "\"");
      if (startRow != null && startRow.length() > 0) {
        assertEquals(startRow, first);
      }
      if (lastRow != null && lastRow.length() > 0) {
        assertEquals(lastRow, last);
      }
    }
  }

  @Test
  public void testScanEmptyToEmpty() throws IOException, InterruptedException,
      ClassNotFoundException {
    testScan(null, null, null);
  }

  @Test
  public void testScanEmptyToAPP() throws IOException, InterruptedException,
      ClassNotFoundException {
    testScan(null, "app", "apo");
  }

  @Test
  public void testScanOBBToOPP() throws IOException, InterruptedException,
      ClassNotFoundException {
    testScan("obb", "opp", "opo");
  }

  @Test
  public void testScanYZYToEmpty() throws IOException, InterruptedException,
      ClassNotFoundException {
    testScan("yzy", null, "zzz");
  }

  /**
   * Tests a MR scan using specific start and stop rows.
   *
   * @throws IOException
   * @throws ClassNotFoundException
   * @throws InterruptedException
   */
  private void testScan(String start, String stop, String last)
      throws IOException, InterruptedException, ClassNotFoundException {
    String jobName =
        "Scan" + (start != null ? start.toUpperCase(Locale.ROOT) : "Empty") + "To" +
            (stop != null ? stop.toUpperCase(Locale.ROOT) : "Empty");
    LOG.info("Before map/reduce startup - job " + jobName);
    Configuration c = new Configuration(TEST_UTIL.getConfiguration());

    c.set(KEY_STARTROW, start != null ? start : "");
    c.set(KEY_LASTROW, last != null ? last : "");

    List<Scan> scans = new ArrayList<>();

    for (String tableName : TABLES) {
      Scan scan = new Scan();

      scan.addFamily(INPUT_FAMILY);
      scan.setAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME, Bytes.toBytes(tableName));

      if (start != null) {
        scan.setStartRow(Bytes.toBytes(start));
      }
      if (stop != null) {
        scan.setStopRow(Bytes.toBytes(stop));
      }

      scans.add(scan);

      LOG.info("scan before: " + scan);
    }

    runJob(jobName, c, scans);
  }

  protected void runJob(String jobName, Configuration c, List<Scan> scans)
      throws IOException, InterruptedException, ClassNotFoundException {
    Job job = new Job(c, jobName);

    initJob(scans, job);
    job.setReducerClass(ScanReducer.class);
    job.setNumReduceTasks(1); // one to get final "first" and "last" key
    FileOutputFormat.setOutputPath(job, new Path(job.getJobName()));
    LOG.info("Started " + job.getJobName());
    job.waitForCompletion(true);
    assertTrue(job.isSuccessful());
    LOG.info("After map/reduce completion - job " + jobName);
  }

  protected abstract void initJob(List<Scan> scans, Job job) throws IOException;


}