/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.hadoop.zebra.mapreduce; import java.io.IOException; import java.util.List; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.mapreduce.InputSplit; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.zebra.io.BasicTable; import org.apache.hadoop.zebra.io.TestBasicTable; import org.apache.hadoop.zebra.mapreduce.RowTableSplit; import org.apache.hadoop.zebra.mapreduce.TableInputFormat; import org.apache.hadoop.zebra.parser.ParseException; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; public class TestTfileSplit { private static Configuration conf; private static Path path; @BeforeClass public static void setUpOnce() throws IOException { TestBasicTable.setUpOnce(); conf = TestBasicTable.conf; path = new Path(TestBasicTable.rootPath, "TfileSplitTest"); } @AfterClass public static void tearDown() throws IOException { BasicTable.drop(path, conf); } /* In this test, we test creating input splits for the projection on non-existing column case. * The first non-deleted column group should be used for split. */ @Test public void testTfileSplit1() throws IOException, ParseException { BasicTable.drop(path, conf); TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true); TableInputFormat inputFormat = new TableInputFormat(); Job job = new Job(conf); inputFormat.setInputPaths(job, path); inputFormat.setMinSplitSize(job, 100); inputFormat.setProjection(job, "aa"); List<InputSplit> splits = inputFormat.getSplits(job); RowTableSplit split = (RowTableSplit) splits.get(0); String str = split.getSplit().toString(); StringTokenizer tokens = new StringTokenizer(str, "\n"); str = tokens.nextToken(); tokens = new StringTokenizer(str, " "); tokens.nextToken(); tokens.nextToken(); String s = tokens.nextToken(); s = s.substring(0, s.length()-1); int cgIndex = Integer.parseInt(s); Assert.assertEquals(cgIndex, 0); } /* In this test, we test creating input splits when dropped column groups are around. * Here the projection involves all columns and only one valid column group is present. * As such, that column group should be used for split.*/ @Test public void testTfileSplit2() throws IOException, ParseException { BasicTable.drop(path, conf); TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true); BasicTable.dropColumnGroup(path, conf, "CG0"); BasicTable.dropColumnGroup(path, conf, "CG2"); TableInputFormat inputFormat = new TableInputFormat(); Job job = new Job(conf); inputFormat.setInputPaths(job, path); inputFormat.setMinSplitSize(job, 100); List<InputSplit> splits = inputFormat.getSplits(job); RowTableSplit split = (RowTableSplit) splits.get( 0 ); String str = split.getSplit().toString(); StringTokenizer tokens = new StringTokenizer(str, "\n"); str = tokens.nextToken(); tokens = new StringTokenizer(str, " "); tokens.nextToken(); tokens.nextToken(); String s = tokens.nextToken(); s = s.substring(0, s.length()-1); int cgIndex = Integer.parseInt(s); Assert.assertEquals(cgIndex, 1); } /* In this test, we test creating input splits when there is no valid column group present. * Should return 0 splits. */ @Test public void testTfileSplit3() throws IOException, ParseException { BasicTable.drop(path, conf); TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true); BasicTable.dropColumnGroup(path, conf, "CG0"); BasicTable.dropColumnGroup(path, conf, "CG1"); BasicTable.dropColumnGroup(path, conf, "CG2"); TableInputFormat inputFormat = new TableInputFormat(); Job job = new Job(conf); inputFormat.setInputPaths(job, path); inputFormat.setMinSplitSize(job, 100); List<InputSplit> splits = inputFormat.getSplits(job); Assert.assertEquals(splits.size(), 0); } @Test public void testSortedSplitOrdering() throws IOException, ParseException { BasicTable.drop(path, conf); TestBasicTable.createBasicTable(1, 1000000, "a, b, c, d, e, f", "[a, e, d]", "a", path, true); TableInputFormat inputFormat = new TableInputFormat(); Job job = new Job(conf); inputFormat.setInputPaths(job, path); inputFormat.setMinSplitSize(job, 100); inputFormat.setProjection(job, "d"); inputFormat.requireSortedTable( job, null ); List<InputSplit> splits = inputFormat.getSplits(job); int index = 0; for( InputSplit is : splits ) { Assert.assertTrue( is instanceof SortedTableSplit ); SortedTableSplit split = (SortedTableSplit)is; Assert.assertEquals( index++, split.getIndex() ); } } }