TestTfileSplit.java example

Explorer
spork-streaming-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.zebra.mapred;

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.zebra.mapred.TableInputFormat;
import org.apache.hadoop.zebra.io.BasicTable;
import org.apache.hadoop.zebra.io.TestBasicTable;
import org.apache.hadoop.zebra.parser.ParseException;
import org.junit.AfterClass;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;

public class TestTfileSplit {
  private static Configuration conf;
  private static Path path;
  
  @BeforeClass
  public static void setUpOnce() throws IOException {
    TestBasicTable.setUpOnce();
    conf = TestBasicTable.conf;
    path = new Path(TestBasicTable.rootPath, "TfileSplitTest");
  }
  
  @AfterClass
  public static void tearDown() throws IOException {
    BasicTable.drop(path, conf);
  }
  
  /* In this test, we test creating input splits for the projection on non-existing column case.
   * The first non-deleted column group should be used for split. */ 
  @Test
  public void testTfileSplit1() 
          throws IOException, ParseException {
    BasicTable.drop(path, conf);
    TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);    

    TableInputFormat inputFormat = new TableInputFormat();
    JobConf jobConf = new JobConf(conf);
    inputFormat.setInputPaths(jobConf, path);
    inputFormat.setMinSplitSize(jobConf, 100);
    inputFormat.setProjection(jobConf, "aa");
    InputSplit[] splits = inputFormat.getSplits(jobConf, 40);
    
    RowTableSplit split = (RowTableSplit) splits[0];
    String str = split.getSplit().toString();
    StringTokenizer tokens = new StringTokenizer(str, "\n");
    str = tokens.nextToken();
    tokens = new StringTokenizer(str, " ");
    tokens.nextToken();
    tokens.nextToken();
    String s = tokens.nextToken();
    s = s.substring(0, s.length()-1);
    int cgIndex = Integer.parseInt(s);
    Assert.assertEquals(cgIndex, 0); 
  }

  /* In this test, we test creating input splits when dropped column groups are around.
   * Here the projection involves all columns and only one valid column group is present.
   * As such, that column group should be used for split.*/
  @Test
  public void testTfileSplit2() 
          throws IOException, ParseException {    
    BasicTable.drop(path, conf);
    TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);    
    BasicTable.dropColumnGroup(path, conf, "CG0");
    BasicTable.dropColumnGroup(path, conf, "CG2");
    
    TableInputFormat inputFormat = new TableInputFormat();
    JobConf jobConf = new JobConf(conf);
    inputFormat.setInputPaths(jobConf, path);
    inputFormat.setMinSplitSize(jobConf, 100);
    InputSplit[] splits = inputFormat.getSplits(jobConf, 40);
    
    RowTableSplit split = (RowTableSplit) splits[0];
    String str = split.getSplit().toString(); 
    StringTokenizer tokens = new StringTokenizer(str, "\n");
    str = tokens.nextToken();
    tokens = new StringTokenizer(str, " ");
    tokens.nextToken();
    tokens.nextToken();
    String s = tokens.nextToken();
    s = s.substring(0, s.length()-1);
    int cgIndex = Integer.parseInt(s);
    Assert.assertEquals(cgIndex, 1); 
  }
  
  /* In this test, we test creating input splits when there is no valid column group present.
   * Should return 0 splits. */
  @Test
  public void testTfileSplit3() 
          throws IOException, ParseException {    
    BasicTable.drop(path, conf);
    TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);    
    BasicTable.dropColumnGroup(path, conf, "CG0");
    BasicTable.dropColumnGroup(path, conf, "CG1");
    BasicTable.dropColumnGroup(path, conf, "CG2");
    
    TableInputFormat inputFormat = new TableInputFormat();
    JobConf jobConf = new JobConf(conf);
    inputFormat.setInputPaths(jobConf, path);
    inputFormat.setMinSplitSize(jobConf, 100);
    InputSplit[] splits = inputFormat.getSplits(jobConf, 40);
    
    Assert.assertEquals(splits.length, 0);
  }
}