/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.hadoop.zebra.io;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import junit.framework.Assert;
import junit.framework.TestCase;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.zebra.tfile.RawComparable;
import org.apache.hadoop.zebra.io.BasicTable;
import org.apache.hadoop.zebra.io.BasicTableStatus;
import org.apache.hadoop.zebra.io.KeyDistribution;
import org.apache.hadoop.zebra.io.TableInserter;
import org.apache.hadoop.zebra.io.TableScanner;
import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.types.Projection;
import org.apache.hadoop.zebra.schema.Schema;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.junit.BeforeClass;
import org.junit.Test;
public class TestBasicTable {
public static Configuration conf;
public static Random random;
public static Path rootPath;
public static FileSystem fs;
@BeforeClass
public static void setUpOnce() throws IOException {
conf = new Configuration();
conf.setInt("table.output.tfile.minBlock.size", 64 * 1024);
conf.setInt("table.input.split.minSize", 64 * 1024);
conf.set("table.output.tfile.compression", "none");
random = new Random(System.nanoTime());
rootPath = new Path(System.getProperty("test.build.data",
"build/test/data/work-dir"));
fs = rootPath.getFileSystem(conf);
}
@BeforeClass
public static void tearDownOnce() throws IOException {
}
static BytesWritable makeRandomKey(int max) {
return makeKey(random.nextInt(max));
}
static BytesWritable makeKey(int i) {
return new BytesWritable(String.format("key%09d", i).getBytes());
}
static String makeString(String prefix, int max) {
return String.format("%s%09d", prefix, random.nextInt(max));
}
public static int createBasicTable(int parts, int rows, String strSchema, String storage, String sortColumns,
Path path, boolean properClose) throws IOException {
if (fs.exists(path)) {
BasicTable.drop(path, conf);
}
BasicTable.Writer writer = new BasicTable.Writer(path, strSchema, storage, sortColumns, null, conf);
writer.finish();
int total = 0;
Schema schema = writer.getSchema();
String colNames[] = schema.getColumns();
Tuple tuple = TypesUtils.createTuple(schema);
boolean sorted = writer.isSorted();
for (int i = 0; i < parts; ++i) {
writer = new BasicTable.Writer(path, conf);
TableInserter inserter = writer.getInserter(
String.format("part-%06d", i), true);
if (rows > 0) {
int actualRows = random.nextInt(rows) + rows / 2;
for (int j = 0; j < actualRows; ++j, ++total) {
BytesWritable key;
if (!sorted) {
key = makeRandomKey(rows * 10);
} else {
key = makeKey(total);
}
TypesUtils.resetTuple(tuple);
for (int k = 0; k < tuple.size(); ++k) {
try {
tuple.set(k, new DataByteArray(makeString("col-" + colNames[k], rows * 10).getBytes()));
} catch (ExecException e) {
e.printStackTrace();
}
}
inserter.insert(key, tuple);
}
}
inserter.close();
}
if (properClose) {
writer = new BasicTable.Writer(path, conf);
writer.close();
/* We can only test number of rows on sorted tables.*/
if (sorted) {
BasicTableStatus status = getStatus(path);
Assert.assertEquals(total, status.getRows());
}
}
return total;
}
static void rangeSplitBasicTable(int numSplits, int totalRows, String strProjection,
Path path) throws IOException, ParseException {
BasicTable.Reader reader = new BasicTable.Reader(path, conf);
reader.setProjection(strProjection);
long totalBytes = reader.getStatus().getSize();
List<RangeSplit> splits = reader.rangeSplit(numSplits);
reader.close();
int total = 0;
for (int i = 0; i < splits.size(); ++i) {
reader = new BasicTable.Reader(path, conf);
reader.setProjection(strProjection);
total += doReadOnly(reader.getScanner(splits.get(i), true));
totalBytes -= reader.getBlockDistribution(splits.get(i)).getLength();
}
Assert.assertEquals(total, totalRows);
Assert.assertEquals(0L, totalBytes);
// TODO: verify tuples contains the right projected values
}
static void doRangeSplit(int[] numSplits, int totalRows, String projection, Path path)
throws IOException, ParseException {
for (int i : numSplits) {
if (i > 0) {
rangeSplitBasicTable(i, totalRows, projection, path);
}
}
}
static void keySplitBasicTable(int numSplits, int totalRows, String strProjection,
Path path) throws IOException, ParseException {
BasicTable.Reader reader = new BasicTable.Reader(path, conf);
reader.setProjection(strProjection);
long totalBytes = reader.getStatus().getSize();
BlockDistribution lastBd = new BlockDistribution();
KeyDistribution keyDistri = reader.getKeyDistribution(numSplits * 10, 1, lastBd);
Assert.assertEquals(totalBytes, keyDistri.length()+lastBd.getLength());
reader.close();
BytesWritable[] keys = null;
if (keyDistri.size() >= numSplits) {
keyDistri.resize(lastBd);
Assert.assertEquals(totalBytes, keyDistri.length()+lastBd.getLength());
RawComparable[] rawComparables = keyDistri.getKeys();
keys = new BytesWritable[rawComparables.length];
for (int i = 0; i < keys.length; ++i) {
keys[i] = new BytesWritable();
keys[i].setSize(rawComparables[i].size());
System.arraycopy(rawComparables[i].buffer(),
rawComparables[i].offset(), keys[i].get(), 0, rawComparables[i]
.size());
}
} else {
int targetSize = Math.min(totalRows / 10, numSplits);
// revert to manually cooked up keys.
Set<Integer> keySets = new TreeSet<Integer>();
while (keySets.size() < targetSize) {
keySets.add(random.nextInt(totalRows));
}
keys = new BytesWritable[targetSize];
if (!keySets.isEmpty()) {
int j = 0;
for (int i : keySets.toArray(new Integer[keySets.size()])) {
keys[j] = makeKey(i);
++j;
}
}
}
int total = 0;
for (int i = 0; i < keys.length; ++i) {
reader = new BasicTable.Reader(path, conf);
reader.setProjection(strProjection);
BytesWritable begin = (i == 0) ? null : keys[i - 1];
BytesWritable end = (i == keys.length - 1) ? null : keys[i];
total += doReadOnly(reader.getScanner(begin, end, true));
}
Assert.assertEquals(total, totalRows);
}
static void doKeySplit(int[] numSplits, int totalRows, String projection, Path path)
throws IOException, ParseException {
for (int i : numSplits) {
if (i > 0) {
keySplitBasicTable(i, totalRows, projection, path);
}
}
}
static BasicTableStatus getStatus(Path path) throws IOException {
BasicTable.Reader reader = new BasicTable.Reader(path, conf);
try {
return reader.getStatus();
} finally {
reader.close();
}
}
static void doReadWrite(Path path, int parts, int rows, String schema,
String storage, String sortColumns, String projection, boolean properClose, boolean sorted)
throws IOException, ParseException {
int totalRows = createBasicTable(parts, rows, schema, storage, sortColumns, path,
properClose);
if (rows == 0) {
Assert.assertEquals(rows, 0);
}
doRangeSplit(new int[] { 1, 2, parts / 2, parts, 2 * parts }, totalRows,
projection, path);
if (sorted) {
doKeySplit(new int[] { 1, 2, parts / 2, parts, 2 * parts, 10 * parts },
totalRows, projection, path);
}
}
public void testMultiCGs() throws IOException, ParseException {
Path path = new Path(rootPath, "TestBasicTableMultiCGs");
doReadWrite(path, 2, 100, "SF_a,SF_b,SF_c,SF_d,SF_e", "[SF_a,SF_b,SF_c];[SF_d,SF_e]", null, "SF_f,SF_a,SF_c,SF_d", true, false);
}
public void testCornerCases() throws IOException, ParseException {
Path path = new Path(rootPath, "TestBasicTableCornerCases");
doReadWrite(path, 0, 0, "a, b, c", "", null, "a, d, c, f", false, false);
doReadWrite(path, 0, 0, "a, b, c", "", null, "a, d, c, f", true, false);
doReadWrite(path, 0, 0, "a, b, c", "", "a", "a, d, c, f", true, true);
doReadWrite(path, 2, 0, "a, b, c", "", null, "a, d, c, f", false, false);
doReadWrite(path, 2, 0, "a, b, c", "", null, "a, d, c, f", true, false);
doReadWrite(path, 2, 0, "a, b, c", "", "a", "a, d, c, f", true, true);
}
static int doReadOnly(TableScanner scanner) throws IOException, ParseException {
int total = 0;
BytesWritable key = new BytesWritable();
Tuple value = TypesUtils.createTuple(scanner.getSchema());
for (; !scanner.atEnd(); scanner.advance()) {
++total;
switch (random.nextInt() % 4) {
case 0:
scanner.getKey(key);
break;
case 1:
scanner.getValue(value);
break;
case 2:
scanner.getKey(key);
scanner.getValue(value);
break;
default: // no-op.
}
}
scanner.close();
return total;
}
@Test
public void testNullSplits() throws IOException, ParseException {
Path path = new Path(rootPath, "TestBasicTableNullSplits");
int totalRows = createBasicTable(2, 250, "a, b, c", "", "a", path, true);
BasicTable.Reader reader = new BasicTable.Reader(path, conf);
reader.setProjection("a,d,c,f");
Assert.assertEquals(totalRows, doReadOnly(reader.getScanner(null, false)));
Assert.assertEquals(totalRows, doReadOnly(reader.getScanner(null, null,
false)));
reader.close();
}
@Test
public void testNegativeSplits() throws IOException, ParseException {
Path path = new Path(rootPath, "TestNegativeSplits");
int totalRows = createBasicTable(2, 250, "a, b, c", "", "", path, true);
rangeSplitBasicTable(-1, totalRows, "a,d,c,f", path);
}
@Test
public void testMetaBlocks() throws IOException, ParseException {
Path path = new Path(rootPath, "TestBasicTableMetaBlocks");
createBasicTable(3, 100, "a, b, c", "", null, path, false);
BasicTable.Writer writer = new BasicTable.Writer(path, conf);
BytesWritable meta1 = makeKey(1234);
BytesWritable meta2 = makeKey(9876);
DataOutputStream dos = writer.createMetaBlock("testMetaBlocks.meta1");
try {
meta1.write(dos);
} finally {
dos.close();
}
dos = writer.createMetaBlock("testMetaBlocks.meta2");
try {
meta2.write(dos);
} finally {
dos.close();
}
writer.close();
BasicTable.Reader reader = new BasicTable.Reader(path, conf);
reader.setProjection("a,d,c,f");
BytesWritable tmp = new BytesWritable();
DataInputStream dis = reader.getMetaBlock("testMetaBlocks.meta1");
try {
tmp.readFields(dis);
Assert.assertTrue(tmp.compareTo(meta1) == 0);
} finally {
dis.close();
}
dis = reader.getMetaBlock("testMetaBlocks.meta2");
try {
tmp.readFields(dis);
Assert.assertTrue(tmp.compareTo(meta2) == 0);
} finally {
dis.close();
}
reader.close();
}
@Test
public void testNormalCases() throws IOException, ParseException {
Path path = new Path(rootPath, "TestBasicTableNormal");
doReadWrite(path, 2, 250, "a, b, c", "", null, "a, d, c, f", true, false);
doReadWrite(path, 2, 250, "a, b, c", "", null, "a, d, c, f", true, false);
doReadWrite(path, 2, 250, "a, b, c", "", "a", "a, d, c, f", true, true);
}
}