TestTfileSplit.java example

Explorer
spork-streaming-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.zebra.mapreduce;

import java.io.IOException;
import java.util.List;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.zebra.io.BasicTable;
import org.apache.hadoop.zebra.io.TestBasicTable;
import org.apache.hadoop.zebra.mapreduce.RowTableSplit;
import org.apache.hadoop.zebra.mapreduce.TableInputFormat;
import org.apache.hadoop.zebra.parser.ParseException;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestTfileSplit {
	private static Configuration conf;
	private static Path path;

	@BeforeClass
	public static void setUpOnce() throws IOException {
		TestBasicTable.setUpOnce();
		conf = TestBasicTable.conf;
		path = new Path(TestBasicTable.rootPath, "TfileSplitTest");
	}

	@AfterClass
	public static void tearDown() throws IOException {
		BasicTable.drop(path, conf);
	}

	/* In this test, we test creating input splits for the projection on non-existing column case.
	 * The first non-deleted column group should be used for split. */ 
	@Test
	public void testTfileSplit1() 
	throws IOException, ParseException {
		BasicTable.drop(path, conf);
		TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);    

		TableInputFormat inputFormat = new TableInputFormat();
		Job job = new Job(conf);
		inputFormat.setInputPaths(job, path);
		inputFormat.setMinSplitSize(job, 100);
		inputFormat.setProjection(job, "aa");
		List<InputSplit> splits = inputFormat.getSplits(job);

		RowTableSplit split = (RowTableSplit) splits.get(0);
		String str = split.getSplit().toString();
		StringTokenizer tokens = new StringTokenizer(str, "\n");
		str = tokens.nextToken();
		tokens = new StringTokenizer(str, " ");
		tokens.nextToken();
		tokens.nextToken();
		String s = tokens.nextToken();
		s = s.substring(0, s.length()-1);
		int cgIndex = Integer.parseInt(s);
		Assert.assertEquals(cgIndex, 0); 
	}

	/* In this test, we test creating input splits when dropped column groups are around.
	 * Here the projection involves all columns and only one valid column group is present.
	 * As such, that column group should be used for split.*/
	@Test
	public void testTfileSplit2() 
	throws IOException, ParseException {    
		BasicTable.drop(path, conf);
		TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);    
		BasicTable.dropColumnGroup(path, conf, "CG0");
		BasicTable.dropColumnGroup(path, conf, "CG2");

		TableInputFormat inputFormat = new TableInputFormat();
		Job job = new Job(conf);
		inputFormat.setInputPaths(job, path);
		inputFormat.setMinSplitSize(job, 100);
		List<InputSplit> splits = inputFormat.getSplits(job);

		RowTableSplit split = (RowTableSplit) splits.get( 0 );
		String str = split.getSplit().toString(); 
		StringTokenizer tokens = new StringTokenizer(str, "\n");
		str = tokens.nextToken();
		tokens = new StringTokenizer(str, " ");
		tokens.nextToken();
		tokens.nextToken();
		String s = tokens.nextToken();
		s = s.substring(0, s.length()-1);
		int cgIndex = Integer.parseInt(s);
		Assert.assertEquals(cgIndex, 1); 
	}

	/* In this test, we test creating input splits when there is no valid column group present.
	 * Should return 0 splits. */
	@Test
	public void testTfileSplit3() 
	throws IOException, ParseException {    
		BasicTable.drop(path, conf);
		TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);    
		BasicTable.dropColumnGroup(path, conf, "CG0");
		BasicTable.dropColumnGroup(path, conf, "CG1");
		BasicTable.dropColumnGroup(path, conf, "CG2");

		TableInputFormat inputFormat = new TableInputFormat();
		Job job = new Job(conf);
		inputFormat.setInputPaths(job, path);
		inputFormat.setMinSplitSize(job, 100);
		List<InputSplit> splits = inputFormat.getSplits(job);

		Assert.assertEquals(splits.size(), 0);
	}
	
	@Test
	public void testSortedSplitOrdering() throws IOException, ParseException {
		BasicTable.drop(path, conf);
		TestBasicTable.createBasicTable(1, 1000000, "a, b, c, d, e, f", "[a, e, d]", "a", path, true);    

		TableInputFormat inputFormat = new TableInputFormat();
		Job job = new Job(conf);
		inputFormat.setInputPaths(job, path);
		inputFormat.setMinSplitSize(job, 100);
		inputFormat.setProjection(job, "d");
		inputFormat.requireSortedTable( job, null );
		List<InputSplit> splits = inputFormat.getSplits(job);
		
		int index = 0;
		for( InputSplit is : splits ) {
			Assert.assertTrue( is instanceof SortedTableSplit );
			SortedTableSplit split = (SortedTableSplit)is;
			Assert.assertEquals( index++, split.getIndex() );
		}
	}
	
}