/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.mapreduce;
import static org.junit.Assert.assertEquals;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.testclassification.LargeTests;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.MapFile;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;
import org.junit.rules.TestName;
/**
* Basic test for the HashTable M/R tool
*/
@Category(LargeTests.class)
public class TestHashTable {
private static final Log LOG = LogFactory.getLog(TestHashTable.class);
private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();
@Rule
public TestName name = new TestName();
@BeforeClass
public static void beforeClass() throws Exception {
TEST_UTIL.startMiniCluster(3);
}
@AfterClass
public static void afterClass() throws Exception {
TEST_UTIL.shutdownMiniCluster();
}
@Test
public void testHashTable() throws Exception {
final TableName tableName = TableName.valueOf(name.getMethodName());
final byte[] family = Bytes.toBytes("family");
final byte[] column1 = Bytes.toBytes("c1");
final byte[] column2 = Bytes.toBytes("c2");
final byte[] column3 = Bytes.toBytes("c3");
int numRows = 100;
int numRegions = 10;
int numHashFiles = 3;
byte[][] splitRows = new byte[numRegions-1][];
for (int i = 1; i < numRegions; i++) {
splitRows[i-1] = Bytes.toBytes(numRows * i / numRegions);
}
long timestamp = 1430764183454L;
// put rows into the first table
Table t1 = TEST_UTIL.createTable(tableName, family, splitRows);
for (int i = 0; i < numRows; i++) {
Put p = new Put(Bytes.toBytes(i), timestamp);
p.addColumn(family, column1, column1);
p.addColumn(family, column2, column2);
p.addColumn(family, column3, column3);
t1.put(p);
}
t1.close();
HashTable hashTable = new HashTable(TEST_UTIL.getConfiguration());
Path testDir = TEST_UTIL.getDataTestDirOnTestFS(tableName.getNameAsString());
long batchSize = 300;
int code = hashTable.run(new String[] {
"--batchsize=" + batchSize,
"--numhashfiles=" + numHashFiles,
"--scanbatch=2",
tableName.getNameAsString(),
testDir.toString()});
assertEquals("test job failed", 0, code);
FileSystem fs = TEST_UTIL.getTestFileSystem();
HashTable.TableHash tableHash = HashTable.TableHash.read(fs.getConf(), testDir);
assertEquals(tableName.getNameAsString(), tableHash.tableName);
assertEquals(batchSize, tableHash.batchSize);
assertEquals(numHashFiles, tableHash.numHashFiles);
assertEquals(numHashFiles - 1, tableHash.partitions.size());
for (ImmutableBytesWritable bytes : tableHash.partitions) {
LOG.debug("partition: " + Bytes.toInt(bytes.get()));
}
ImmutableMap<Integer, ImmutableBytesWritable> expectedHashes
= ImmutableMap.<Integer, ImmutableBytesWritable>builder()
.put(-1, new ImmutableBytesWritable(Bytes.fromHex("714cb10a9e3b5569852980edd8c6ca2f")))
.put(5, new ImmutableBytesWritable(Bytes.fromHex("28d961d9252ce8f8d44a07b38d3e1d96")))
.put(10, new ImmutableBytesWritable(Bytes.fromHex("f6bbc4a224d8fd929b783a92599eaffa")))
.put(15, new ImmutableBytesWritable(Bytes.fromHex("522deb5d97f73a414ecc11457be46881")))
.put(20, new ImmutableBytesWritable(Bytes.fromHex("b026f2611aaa46f7110116d807545352")))
.put(25, new ImmutableBytesWritable(Bytes.fromHex("39ffc1a3094aa12a2e90ffd9cef2ce93")))
.put(30, new ImmutableBytesWritable(Bytes.fromHex("f6b4d75727ce9a30ac29e4f08f601666")))
.put(35, new ImmutableBytesWritable(Bytes.fromHex("422e2d2f1eb79a8f02171a705a42c090")))
.put(40, new ImmutableBytesWritable(Bytes.fromHex("559ad61c900fffefea0a15abf8a97bc3")))
.put(45, new ImmutableBytesWritable(Bytes.fromHex("23019084513eca41cee436b2a29611cb")))
.put(50, new ImmutableBytesWritable(Bytes.fromHex("b40467d222ddb4949b142fe145ee9edc")))
.put(55, new ImmutableBytesWritable(Bytes.fromHex("372bf89fcd8ca4b7ab3c1add9d07f7e4")))
.put(60, new ImmutableBytesWritable(Bytes.fromHex("69ae0585e6255de27dce974e332b8f8b")))
.put(65, new ImmutableBytesWritable(Bytes.fromHex("8029610044297aad0abdbecd485d8e59")))
.put(70, new ImmutableBytesWritable(Bytes.fromHex("de5f784f7f78987b6e57ecfd81c8646f")))
.put(75, new ImmutableBytesWritable(Bytes.fromHex("1cd757cc4e1715c8c3b1c24447a1ec56")))
.put(80, new ImmutableBytesWritable(Bytes.fromHex("f9a53aacfeb6142b08066615e7038095")))
.put(85, new ImmutableBytesWritable(Bytes.fromHex("89b872b7e639df32d3276b33928c0c91")))
.put(90, new ImmutableBytesWritable(Bytes.fromHex("45eeac0646d46a474ea0484175faed38")))
.put(95, new ImmutableBytesWritable(Bytes.fromHex("f57c447e32a08f4bf1abb2892839ac56")))
.build();
Map<Integer, ImmutableBytesWritable> actualHashes = new HashMap<>();
Path dataDir = new Path(testDir, HashTable.HASH_DATA_DIR);
for (int i = 0; i < numHashFiles; i++) {
Path hashPath = new Path(dataDir, HashTable.TableHash.getDataFileName(i));
MapFile.Reader reader = new MapFile.Reader(hashPath, fs.getConf());
ImmutableBytesWritable key = new ImmutableBytesWritable();
ImmutableBytesWritable hash = new ImmutableBytesWritable();
while(reader.next(key, hash)) {
String keyString = Bytes.toHex(key.get(), key.getOffset(), key.getLength());
LOG.debug("Key: " + (keyString.isEmpty() ? "-1" : Integer.parseInt(keyString, 16))
+ " Hash: " + Bytes.toHex(hash.get(), hash.getOffset(), hash.getLength()));
int intKey = -1;
if (key.getLength() > 0) {
intKey = Bytes.toInt(key.get(), key.getOffset(), key.getLength());
}
if (actualHashes.containsKey(intKey)) {
Assert.fail("duplicate key in data files: " + intKey);
}
actualHashes.put(intKey, new ImmutableBytesWritable(hash.copyBytes()));
}
reader.close();
}
FileStatus[] files = fs.listStatus(testDir);
for (FileStatus file : files) {
LOG.debug("Output file: " + file.getPath());
}
files = fs.listStatus(dataDir);
for (FileStatus file : files) {
LOG.debug("Data file: " + file.getPath());
}
if (!expectedHashes.equals(actualHashes)) {
LOG.error("Diff: " + Maps.difference(expectedHashes, actualHashes));
}
Assert.assertEquals(expectedHashes, actualHashes);
TEST_UTIL.deleteTable(tableName);
TEST_UTIL.cleanupDataTestDirOnTestFS();
}
}