/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.mapreduce; import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.CategoryBasedTimeout; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.mapreduce.SyncTable.SyncMapper.Counter; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.mapreduce.Counters; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; import org.junit.rules.TestName; import org.junit.rules.TestRule; import com.google.common.base.Throwables; /** * Basic test for the SyncTable M/R tool */ @Category(LargeTests.class) public class TestSyncTable { @Rule public final TestRule timeout = CategoryBasedTimeout.builder(). withTimeout(this.getClass()).withLookingForStuckThread(true).build(); private static final Log LOG = LogFactory.getLog(TestSyncTable.class); private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); @Rule public TestName name = new TestName(); @BeforeClass public static void beforeClass() throws Exception { TEST_UTIL.startMiniCluster(3); } @AfterClass public static void afterClass() throws Exception { TEST_UTIL.shutdownMiniCluster(); } private static byte[][] generateSplits(int numRows, int numRegions) { byte[][] splitRows = new byte[numRegions-1][]; for (int i = 1; i < numRegions; i++) { splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions); } return splitRows; } @Test public void testSyncTable() throws Exception { final TableName sourceTableName = TableName.valueOf(name.getMethodName() + "_source"); final TableName targetTableName = TableName.valueOf(name.getMethodName() + "_target"); Path testDir = TEST_UTIL.getDataTestDirOnTestFS("testSyncTable"); writeTestData(sourceTableName, targetTableName); hashSourceTable(sourceTableName, testDir); Counters syncCounters = syncTables(sourceTableName, targetTableName, testDir); assertEqualTables(90, sourceTableName, targetTableName); assertEquals(60, syncCounters.findCounter(Counter.ROWSWITHDIFFS).getValue()); assertEquals(10, syncCounters.findCounter(Counter.SOURCEMISSINGROWS).getValue()); assertEquals(10, syncCounters.findCounter(Counter.TARGETMISSINGROWS).getValue()); assertEquals(50, syncCounters.findCounter(Counter.SOURCEMISSINGCELLS).getValue()); assertEquals(50, syncCounters.findCounter(Counter.TARGETMISSINGCELLS).getValue()); assertEquals(20, syncCounters.findCounter(Counter.DIFFERENTCELLVALUES).getValue()); TEST_UTIL.deleteTable(sourceTableName); TEST_UTIL.deleteTable(targetTableName); TEST_UTIL.cleanupDataTestDirOnTestFS(); } private void assertEqualTables(int expectedRows, TableName sourceTableName, TableName targetTableName) throws Exception { Table sourceTable = TEST_UTIL.getConnection().getTable(sourceTableName); Table targetTable = TEST_UTIL.getConnection().getTable(targetTableName); ResultScanner sourceScanner = sourceTable.getScanner(new Scan()); ResultScanner targetScanner = targetTable.getScanner(new Scan()); for (int i = 0; i < expectedRows; i++) { Result sourceRow = sourceScanner.next(); Result targetRow = targetScanner.next(); LOG.debug("SOURCE row: " + (sourceRow == null ? "null" : Bytes.toInt(sourceRow.getRow())) + " cells:" + sourceRow); LOG.debug("TARGET row: " + (targetRow == null ? "null" : Bytes.toInt(targetRow.getRow())) + " cells:" + targetRow); if (sourceRow == null) { Assert.fail("Expected " + expectedRows + " source rows but only found " + i); } if (targetRow == null) { Assert.fail("Expected " + expectedRows + " target rows but only found " + i); } Cell[] sourceCells = sourceRow.rawCells(); Cell[] targetCells = targetRow.rawCells(); if (sourceCells.length != targetCells.length) { LOG.debug("Source cells: " + Arrays.toString(sourceCells)); LOG.debug("Target cells: " + Arrays.toString(targetCells)); Assert.fail("Row " + Bytes.toInt(sourceRow.getRow()) + " has " + sourceCells.length + " cells in source table but " + targetCells.length + " cells in target table"); } for (int j = 0; j < sourceCells.length; j++) { Cell sourceCell = sourceCells[j]; Cell targetCell = targetCells[j]; try { if (!CellUtil.matchingRow(sourceCell, targetCell)) { Assert.fail("Rows don't match"); } if (!CellUtil.matchingFamily(sourceCell, targetCell)) { Assert.fail("Families don't match"); } if (!CellUtil.matchingQualifier(sourceCell, targetCell)) { Assert.fail("Qualifiers don't match"); } if (!CellUtil.matchingTimestamp(sourceCell, targetCell)) { Assert.fail("Timestamps don't match"); } if (!CellUtil.matchingValue(sourceCell, targetCell)) { Assert.fail("Values don't match"); } } catch (Throwable t) { LOG.debug("Source cell: " + sourceCell + " target cell: " + targetCell); Throwables.propagate(t); } } } Result sourceRow = sourceScanner.next(); if (sourceRow != null) { Assert.fail("Source table has more than " + expectedRows + " rows. Next row: " + Bytes.toInt(sourceRow.getRow())); } Result targetRow = targetScanner.next(); if (targetRow != null) { Assert.fail("Target table has more than " + expectedRows + " rows. Next row: " + Bytes.toInt(targetRow.getRow())); } sourceScanner.close(); targetScanner.close(); sourceTable.close(); targetTable.close(); } private Counters syncTables(TableName sourceTableName, TableName targetTableName, Path testDir) throws Exception { SyncTable syncTable = new SyncTable(TEST_UTIL.getConfiguration()); int code = syncTable.run(new String[] { testDir.toString(), sourceTableName.getNameAsString(), targetTableName.getNameAsString() }); assertEquals("sync table job failed", 0, code); LOG.info("Sync tables completed"); return syncTable.counters; } private void hashSourceTable(TableName sourceTableName, Path testDir) throws Exception, IOException { int numHashFiles = 3; long batchSize = 100; // should be 2 batches per region int scanBatch = 1; HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration()); int code = hashTable.run(new String[] { "--batchsize=" + batchSize, "--numhashfiles=" + numHashFiles, "--scanbatch=" + scanBatch, sourceTableName.getNameAsString(), testDir.toString()}); assertEquals("hash table job failed", 0, code); FileSystem fs = TEST_UTIL.getTestFileSystem(); HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir); assertEquals(sourceTableName.getNameAsString(), tableHash.tableName); assertEquals(batchSize, tableHash.batchSize); assertEquals(numHashFiles, tableHash.numHashFiles); assertEquals(numHashFiles - 1, tableHash.partitions.size()); LOG.info("Hash table completed"); } private void writeTestData(TableName sourceTableName, TableName targetTableName) throws Exception { final byte[] family = Bytes.toBytes("family"); final byte[] column1 = Bytes.toBytes("c1"); final byte[] column2 = Bytes.toBytes("c2"); final byte[] value1 = Bytes.toBytes("val1"); final byte[] value2 = Bytes.toBytes("val2"); final byte[] value3 = Bytes.toBytes("val3"); int numRows = 100; int sourceRegions = 10; int targetRegions = 6; Table sourceTable = TEST_UTIL.createTable(sourceTableName, family, generateSplits(numRows, sourceRegions)); Table targetTable = TEST_UTIL.createTable(targetTableName, family, generateSplits(numRows, targetRegions)); long timestamp = 1430764183454L; int rowIndex = 0; // a bunch of identical rows for (; rowIndex < 40; rowIndex++) { Put sourcePut = new Put(Bytes.toBytes(rowIndex)); sourcePut.addColumn(family, column1, timestamp, value1); sourcePut.addColumn(family, column2, timestamp, value2); sourceTable.put(sourcePut); Put targetPut = new Put(Bytes.toBytes(rowIndex)); targetPut.addColumn(family, column1, timestamp, value1); targetPut.addColumn(family, column2, timestamp, value2); targetTable.put(targetPut); } // some rows only in the source table // ROWSWITHDIFFS: 10 // TARGETMISSINGROWS: 10 // TARGETMISSINGCELLS: 20 for (; rowIndex < 50; rowIndex++) { Put put = new Put(Bytes.toBytes(rowIndex)); put.addColumn(family, column1, timestamp, value1); put.addColumn(family, column2, timestamp, value2); sourceTable.put(put); } // some rows only in the target table // ROWSWITHDIFFS: 10 // SOURCEMISSINGROWS: 10 // SOURCEMISSINGCELLS: 20 for (; rowIndex < 60; rowIndex++) { Put put = new Put(Bytes.toBytes(rowIndex)); put.addColumn(family, column1, timestamp, value1); put.addColumn(family, column2, timestamp, value2); targetTable.put(put); } // some rows with 1 missing cell in target table // ROWSWITHDIFFS: 10 // TARGETMISSINGCELLS: 10 for (; rowIndex < 70; rowIndex++) { Put sourcePut = new Put(Bytes.toBytes(rowIndex)); sourcePut.addColumn(family, column1, timestamp, value1); sourcePut.addColumn(family, column2, timestamp, value2); sourceTable.put(sourcePut); Put targetPut = new Put(Bytes.toBytes(rowIndex)); targetPut.addColumn(family, column1, timestamp, value1); targetTable.put(targetPut); } // some rows with 1 missing cell in source table // ROWSWITHDIFFS: 10 // SOURCEMISSINGCELLS: 10 for (; rowIndex < 80; rowIndex++) { Put sourcePut = new Put(Bytes.toBytes(rowIndex)); sourcePut.addColumn(family, column1, timestamp, value1); sourceTable.put(sourcePut); Put targetPut = new Put(Bytes.toBytes(rowIndex)); targetPut.addColumn(family, column1, timestamp, value1); targetPut.addColumn(family, column2, timestamp, value2); targetTable.put(targetPut); } // some rows differing only in timestamp // ROWSWITHDIFFS: 10 // SOURCEMISSINGCELLS: 20 // TARGETMISSINGCELLS: 20 for (; rowIndex < 90; rowIndex++) { Put sourcePut = new Put(Bytes.toBytes(rowIndex)); sourcePut.addColumn(family, column1, timestamp, column1); sourcePut.addColumn(family, column2, timestamp, value2); sourceTable.put(sourcePut); Put targetPut = new Put(Bytes.toBytes(rowIndex)); targetPut.addColumn(family, column1, timestamp+1, column1); targetPut.addColumn(family, column2, timestamp-1, value2); targetTable.put(targetPut); } // some rows with different values // ROWSWITHDIFFS: 10 // DIFFERENTCELLVALUES: 20 for (; rowIndex < numRows; rowIndex++) { Put sourcePut = new Put(Bytes.toBytes(rowIndex)); sourcePut.addColumn(family, column1, timestamp, value1); sourcePut.addColumn(family, column2, timestamp, value2); sourceTable.put(sourcePut); Put targetPut = new Put(Bytes.toBytes(rowIndex)); targetPut.addColumn(family, column1, timestamp, value3); targetPut.addColumn(family, column2, timestamp, value3); targetTable.put(targetPut); } sourceTable.close(); targetTable.close(); } }