/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.blur.mapreduce.lib;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.UUID;
import org.apache.blur.MiniCluster;
import org.apache.blur.store.buffer.BufferStore;
import org.apache.blur.thirdparty.thrift_0_9_0.TException;
import org.apache.blur.thrift.BlurClient;
import org.apache.blur.thrift.generated.Blur.Iface;
import org.apache.blur.thrift.generated.BlurException;
import org.apache.blur.thrift.generated.Column;
import org.apache.blur.thrift.generated.ColumnDefinition;
import org.apache.blur.thrift.generated.Record;
import org.apache.blur.thrift.generated.RecordMutation;
import org.apache.blur.thrift.generated.RecordMutationType;
import org.apache.blur.thrift.generated.RowMutation;
import org.apache.blur.thrift.generated.TableDescriptor;
import org.apache.blur.utils.BlurConstants;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.SequenceFile.Reader;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.Counters;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
public class BlurInputFormatTest {
private static Configuration conf = new Configuration();
private static MiniCluster miniCluster;
@BeforeClass
public static void setupTest() throws Exception {
setupJavaHome();
File file = new File("./target/tmp/BlurInputFormatTest_tmp");
String pathStr = file.getAbsoluteFile().toURI().toString();
System.setProperty("test.build.data", pathStr + "/data");
System.setProperty("hadoop.log.dir", pathStr + "/hadoop_log");
miniCluster = new MiniCluster();
miniCluster.startBlurCluster(pathStr + "/blur", 2, 2);
miniCluster.startMrMiniCluster();
conf = miniCluster.getMRConfiguration();
BufferStore.initNewBuffer(128, 128 * 128);
}
public static void setupJavaHome() {
String str = System.getenv("JAVA_HOME");
if (str == null) {
String property = System.getProperty("java.home");
if (property != null) {
throw new RuntimeException("JAVA_HOME not set should probably be [" + property + "].");
}
throw new RuntimeException("JAVA_HOME not set.");
}
}
@AfterClass
public static void teardown() throws IOException {
if (miniCluster != null) {
miniCluster.stopMrMiniCluster();
}
rm(new File("build"));
}
private static void rm(File file) {
if (!file.exists()) {
return;
}
if (file.isDirectory()) {
for (File f : file.listFiles()) {
rm(f);
}
}
file.delete();
}
@Test
public void testBlurInputFormatFastDisabledNoFileCache() throws IOException, BlurException, TException,
ClassNotFoundException, InterruptedException {
String tableName = "testBlurInputFormatFastDisabledNoFileCache";
runTest(tableName, true, null);
}
@Test
public void testBlurInputFormatFastEnabledNoFileCache() throws IOException, BlurException, TException,
ClassNotFoundException, InterruptedException {
String tableName = "testBlurInputFormatFastEnabledNoFileCache";
runTest(tableName, false, null);
}
@Test
public void testBlurInputFormatFastDisabledFileCache() throws IOException, BlurException, TException,
ClassNotFoundException, InterruptedException {
String tableName = "testBlurInputFormatFastDisabledFileCache";
Path fileCache = new Path(miniCluster.getFileSystemUri() + "/filecache");
runTest(tableName, true, fileCache);
}
@Test
public void testBlurInputFormatFastEnabledFileCache() throws IOException, BlurException, TException,
ClassNotFoundException, InterruptedException {
String tableName = "testBlurInputFormatFastEnabledFileCache";
Path fileCache = new Path(miniCluster.getFileSystemUri() + "/filecache");
runTest(tableName, false, fileCache);
}
private void runTest(String tableName, boolean disableFast, Path fileCache) throws IOException, BlurException,
TException, InterruptedException, ClassNotFoundException {
FileSystem fileSystem = miniCluster.getFileSystem();
Path root = new Path(fileSystem.getUri() + "/");
creatTable(tableName, new Path(root, "tables"), disableFast);
loadTable(tableName, 100, 100);
Iface client = getClient();
TableDescriptor tableDescriptor = client.describe(tableName);
Job job = Job.getInstance(conf, "Read Data");
job.setJarByClass(BlurInputFormatTest.class);
job.setMapperClass(TestMapper.class);
job.setInputFormatClass(BlurInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setNumReduceTasks(0);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(TableBlurRecord.class);
Path output = new Path(new Path(root, "output"), tableName);
String snapshot = UUID.randomUUID().toString();
client.createSnapshot(tableName, snapshot);
if (fileCache != null) {
BlurInputFormat.setLocalCachePath(job, fileCache);
}
BlurInputFormat.setMaxNumberOfMaps(job, 1);
BlurInputFormat.setZooKeeperConnectionStr(job, miniCluster.getZkConnectionString());
BlurInputFormat.addTable(job, tableDescriptor, snapshot);
FileOutputFormat.setOutputPath(job, output);
try {
assertTrue(job.waitForCompletion(true));
Counters counters = job.getCounters();
assertMapTask(1, counters);
} finally {
client.removeSnapshot(tableName, snapshot);
}
final Map<Text, TableBlurRecord> results = new TreeMap<Text, TableBlurRecord>();
walkOutput(output, conf, new ResultReader() {
@Override
public void read(Text rowId, TableBlurRecord tableBlurRecord) {
results.put(new Text(rowId), new TableBlurRecord(tableBlurRecord));
}
});
int rowId = 100;
for (Entry<Text, TableBlurRecord> e : results.entrySet()) {
Text r = e.getKey();
assertEquals(new Text("row-" + rowId), r);
BlurRecord blurRecord = new BlurRecord();
blurRecord.setRowId("row-" + rowId);
blurRecord.setRecordId("record-" + rowId);
blurRecord.setFamily("fam0");
blurRecord.addColumn("col0", "value-" + rowId);
TableBlurRecord tableBlurRecord = new TableBlurRecord(new Text(tableName), blurRecord);
assertEquals(tableBlurRecord, e.getValue());
rowId++;
}
assertEquals(200, rowId);
}
private void assertMapTask(int i, Counters counters) {
for (CounterGroup counterGroup : counters) {
String name = counterGroup.getName();
boolean jobCounterGroup = false;
if (name.equals("org.apache.hadoop.mapreduce.JobCounter")) {
jobCounterGroup = true;
} else if (name.equals("org.apache.hadoop.mapred.JobInProgress$Counter")) {
jobCounterGroup = true;
}
if (jobCounterGroup) {
for (Counter counter : counterGroup) {
if (counter.getName().equals("TOTAL_LAUNCHED_MAPS")) {
assertEquals(1, counter.getValue());
return;
}
}
}
}
fail();
}
public interface ResultReader {
void read(Text rowId, TableBlurRecord tableBlurRecord);
}
private void walkOutput(Path output, Configuration conf, ResultReader resultReader) throws IOException {
FileSystem fileSystem = output.getFileSystem(conf);
FileStatus fileStatus = fileSystem.getFileStatus(output);
if (fileStatus.isDir()) {
FileStatus[] listStatus = fileSystem.listStatus(output, new PathFilter() {
@Override
public boolean accept(Path path) {
return !path.getName().startsWith("_");
}
});
for (FileStatus fs : listStatus) {
walkOutput(fs.getPath(), conf, resultReader);
}
} else {
Reader reader = new SequenceFile.Reader(fileSystem, output, conf);
Text rowId = new Text();
TableBlurRecord tableBlurRecord = new TableBlurRecord();
while (reader.next(rowId, tableBlurRecord)) {
resultReader.read(rowId, tableBlurRecord);
}
reader.close();
}
}
private Iface getClient() {
return BlurClient.getClientFromZooKeeperConnectionStr(miniCluster.getZkConnectionString());
}
private void loadTable(String tableName, int startId, int numb) throws BlurException, TException {
Iface client = getClient();
List<RowMutation> batch = new ArrayList<RowMutation>();
for (int i = 0; i < numb; i++) {
int id = startId + i;
RowMutation rowMutation = new RowMutation();
rowMutation.setTable(tableName);
rowMutation.setRowId("row-" + Integer.toString(id));
Record record = new Record();
record.setFamily("fam0");
record.setRecordId("record-" + id);
record.addToColumns(new Column("col0", "value-" + id));
rowMutation.addToRecordMutations(new RecordMutation(RecordMutationType.REPLACE_ENTIRE_RECORD, record));
batch.add(rowMutation);
}
client.mutateBatch(batch);
}
private void creatTable(String tableName, Path tables, boolean fastDisable) throws BlurException, TException {
Path tablePath = new Path(tables, tableName);
Iface client = getClient();
TableDescriptor tableDescriptor = new TableDescriptor();
tableDescriptor.setTableUri(tablePath.toString());
tableDescriptor.setName(tableName);
tableDescriptor.setShardCount(2);
tableDescriptor.putToTableProperties(BlurConstants.BLUR_TABLE_DISABLE_FAST_DIR, Boolean.toString(fastDisable));
client.createTable(tableDescriptor);
ColumnDefinition colDef = new ColumnDefinition();
colDef.setFamily("fam0");
colDef.setColumnName("col0");
colDef.setFieldType("string");
client.addColumnDefinition(tableName, colDef);
}
public static class TestMapper extends Mapper<Text, TableBlurRecord, Text, TableBlurRecord> {
@Override
protected void map(Text key, TableBlurRecord value, Context context) throws IOException, InterruptedException {
context.write(key, value);
}
}
}