TestSimpleType.java example

Explorer
spork-streaming-master
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements. See the NOTICE file distributed with this
 * work for additional information regarding copyright ownership. The ASF
 * licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package org.apache.hadoop.zebra.pig;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.StringTokenizer;

import junit.framework.Assert;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.zebra.io.BasicTable;
import org.apache.hadoop.zebra.io.TableInserter;
import org.apache.hadoop.zebra.pig.TableStorer;
import org.apache.hadoop.zebra.parser.ParseException;
import org.apache.hadoop.zebra.schema.Schema;
import org.apache.hadoop.zebra.types.TypesUtils;
import org.apache.hadoop.zebra.BaseTestCase;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.data.DataByteArray;
import org.apache.pig.data.Tuple;
import org.apache.pig.backend.executionengine.ExecJob;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;

/**
 * 
 * Test projections on complicated column types.
 * 
 */
public class TestSimpleType extends BaseTestCase {

  final static String STR_SCHEMA = "s1:bool, s2:int, s3:long, s4:double, s5:string, s6:bytes";
  final static String STR_STORAGE = "[s1, s2]; [s3, s4]; [s5, s6]";
  private static Path path;

  @BeforeClass
  public static void setUpOnce() throws IOException, Exception {
    init();


    path = getTableFullPath("TesMapType");
    removeDir(path);

    BasicTable.Writer writer = new BasicTable.Writer(path, STR_SCHEMA,
        STR_STORAGE, conf);
    Schema schema = writer.getSchema();

    BasicTable.Writer writer1 = new BasicTable.Writer(path, conf);
    int part = 0;
    TableInserter inserter = writer1.getInserter("part" + part, true);

    Tuple tuple = TypesUtils.createTuple(schema);
    TypesUtils.resetTuple(tuple);

    // insert data in row 1
    int row = 0;
    tuple.set(0, true); // bool
    tuple.set(1, 1); // int
    tuple.set(2, 1001L); // long
    tuple.set(3, 1.1); // float
    tuple.set(4, "hello world 1"); // string
    tuple.set(5, new DataByteArray("hello byte 1")); // byte
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);

    row++;
    TypesUtils.resetTuple(tuple);

    tuple.set(0, false);
    tuple.set(1, 2); // int
    tuple.set(2, 1002L); // long
    tuple.set(3, 3.1); // float
    tuple.set(4, "hello world 2"); // string
    tuple.set(5, new DataByteArray("hello byte 2")); // byte
    inserter.insert(new BytesWritable(String.format("k%d%d", part + 1, row + 1)
        .getBytes()), tuple);

    inserter.close();
    writer1.finish();
    writer.close();
  }

  @AfterClass
  public static void tearDownOnce() throws IOException {
    BasicTable.drop(path, conf);
  }

  /**
   * Return the name of the routine that called getCurrentMethodName
   * 
   */
  public String getCurrentMethodName() {
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    PrintWriter pw = new PrintWriter(baos);
    (new Throwable()).printStackTrace(pw);
    pw.flush();
    String stackTrace = baos.toString();
    pw.close();

    StringTokenizer tok = new StringTokenizer(stackTrace, "\n");
    tok.nextToken(); // 'java.lang.Throwable'
    tok.nextToken(); // 'at ...getCurrentMethodName'
    String l = tok.nextToken(); // 'at ...<caller to getCurrentRoutine>'
    // Parse line 3
    tok = new StringTokenizer(l.trim(), " <(");
    String t = tok.nextToken(); // 'at'
    t = tok.nextToken(); // '...<caller to getCurrentRoutine>'
    return t;
  }
  
  // @Test
  public void testReadSimpleStitch() throws IOException, ParseException {
    String query = "records = LOAD '" + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('s5,s1');";
    System.out.println(query);
    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("records");
    BytesWritable key = new BytesWritable();
    int row = 0;
    while (it.hasNext()) {
      Tuple RowValue = it.next();
      System.out.println(RowValue);
      row++;
      if (row == 1) {
        Assert.assertEquals("hello world 1", RowValue.get(0));
        Assert.assertEquals(true, RowValue.get(1));
      }
      if (row == 2) {
        Assert.assertEquals("hello world 2", RowValue.get(0));
        Assert.assertEquals(false, RowValue.get(1));
      }
    }
  }

  // @Test
  // Test reader
  public void testReadSimple1() throws IOException, ParseException {
    String query = "records = LOAD '"
        + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');";
    System.out.println(query);
    pigServer.registerQuery(query);
    Iterator<Tuple> it = pigServer.openIterator("records");
    BytesWritable key = new BytesWritable();
    int row = 0;
    while (it.hasNext()) {
      Tuple RowValue = it.next();
      System.out.println(RowValue);
      row++;
      if (row == 1) {
        // Assert.assertEquals(key, new
        // BytesWritable("k11".getBytes()));
        Assert.assertEquals(true, RowValue.get(5));
        Assert.assertEquals(1, RowValue.get(4));
        Assert.assertEquals(1001L, RowValue.get(3));
        Assert.assertEquals(1.1, RowValue.get(2));
        Assert.assertEquals("hello world 1", RowValue.get(1));
        Assert.assertEquals("hello byte 1", RowValue.get(0).toString());
      }
      if (row == 2) {
        Assert.assertEquals(false, RowValue.get(5));
        Assert.assertEquals(2, RowValue.get(4));
        Assert.assertEquals(1002L, RowValue.get(3));
        Assert.assertEquals(3.1, RowValue.get(2));
        Assert.assertEquals("hello world 2", RowValue.get(1));
        Assert.assertEquals("hello byte 2", RowValue.get(0).toString());
      }
    }
  }

  // @Test
  // Test reader, negative. not exist field in the projection
  public void testRead2() throws IOException, ParseException {
    try {
      String query = "records = LOAD '" + path.toString()
          + "' USING org.apache.hadoop.zebra.pig.TableLoader('s7');";
      Assert.fail("Project should not take non-existent fields");
    } catch (Exception e) {
      System.out.println(e);
    }

  }

  @Test
  // Store same table
  public void testStorer() throws ExecException, IOException {
    //
    // Use pig LOAD to load testing data for store
    //
    String query = "records = LOAD '"
        + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
    pigServer.registerQuery(query);

    Iterator<Tuple> it = pigServer.openIterator("records");
    while (it.hasNext()) {
      Tuple RowValue = it.next();
      System.out.println(RowValue);
    }

    //
    // Use pig STORE to store testing data
    //
    Path newPath = new Path(getCurrentMethodName());
    pigServer
        .store(
            "records",
            new Path(newPath, "store").toString(),
            TableStorer.class.getCanonicalName()
                + "('[s1, s2]; [s3, s4]')");

  }

  @Test
  // store different records, second row of the previous table
  public void testStorer2() throws ExecException, IOException {
    // Load original table
    String query = "records = LOAD '"
        + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
    System.out.println(query);
    pigServer.registerQuery(query);

    // filter the original table
    String query2 = "newRecord = FILTER records BY (s2 >= 2);";
    pigServer.registerQuery(query2);

    // store the new records to new table
    Path newPath = new Path(getCurrentMethodName());
    pigServer
        .store(
            "newRecord",
            newPath.toString(),
            TableStorer.class.getCanonicalName()
                + "('[s1, s2]; [s3, s4]')");

    // check new table content
    String query3 = "newRecords = LOAD '"
        + newPath.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');";
    System.out.println(query3);
    // newRecords = LOAD
    // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING
    // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);
    pigServer.registerQuery(query3);

    Iterator<Tuple> it3 = pigServer.openIterator("newRecords");
    // BytesWritable key2 = new BytesWritable();
    int row = 0;
    Tuple RowValue2 = null;
    while (it3.hasNext()) {
      // Last row value
      RowValue2 = it3.next();
      row++;
      if (row == 1) {
        Assert.assertEquals(false, RowValue2.get(5));
        Assert.assertEquals(2, RowValue2.get(4));
        Assert.assertEquals(1002L, RowValue2.get(3));
        Assert.assertEquals(3.1, RowValue2.get(2));
        Assert.assertEquals("hello world 2", RowValue2.get(1));
        Assert.assertEquals("hello byte 2", RowValue2.get(0).toString());
      }
    }
    Assert.assertEquals(1, row);
  }

  @Test
  // store different records, with storage hint is empty
  public void testStorer3() throws ExecException, IOException {
    // Load original table
    String query = "records = LOAD '"
        + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
    System.out.println(query);
    pigServer.registerQuery(query);

    // filter the original table
    String query2 = "newRecord = FILTER records BY (s2 >= 2);";
    pigServer.registerQuery(query2);

    // store the new records to new table
    Path newPath = new Path(getCurrentMethodName());
    pigServer.store("newRecord", newPath.toString(), TableStorer.class
        .getCanonicalName()
        + "('')");

    // check new table content
    String query3 = "newRecords = LOAD '"
        + newPath.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');";
    System.out.println(query3);
    // newRecords = LOAD
    // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING
    // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);
    pigServer.registerQuery(query3);

    Iterator<Tuple> it3 = pigServer.openIterator("newRecords");
    // BytesWritable key2 = new BytesWritable();
    int row = 0;
    Tuple RowValue2 = null;
    while (it3.hasNext()) {
      // Last row value
      RowValue2 = it3.next();
      row++;
      if (row == 1) {
        Assert.assertEquals(false, RowValue2.get(5));
        Assert.assertEquals(2, RowValue2.get(4));
        Assert.assertEquals(1002L, RowValue2.get(3));
        Assert.assertEquals(3.1, RowValue2.get(2));
        Assert.assertEquals("hello world 2", RowValue2.get(1));
        Assert.assertEquals("hello byte 2", RowValue2.get(0).toString());
      }
    }
    Assert.assertEquals(1, row);
  }

  @Test
  // store different records, with column group is empty
  public void testStorer4() throws ExecException, IOException {
    // Load original table
    String query = "records = LOAD '"
        + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
    System.out.println(query);
    pigServer.registerQuery(query);

    // filter the original table
    String query2 = "newRecord = FILTER records BY (s2 >= 2);";
    pigServer.registerQuery(query2);

    // store the new records to new table
    Path newPath = new Path(getCurrentMethodName());
    pigServer.store("newRecord", newPath.toString(), TableStorer.class
        .getCanonicalName()
        + "('[]')");

    // check new table content
    String query3 = "newRecords = LOAD '"
        + newPath.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader('s6,s5,s4,s3,s2,s1');";
    System.out.println(query3);
    // newRecords = LOAD
    // 'org.apache.hadoop.zebra.pig.TestSimpleType.testStorer2' USING
    // org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);
    pigServer.registerQuery(query3);

    Iterator<Tuple> it3 = pigServer.openIterator("newRecords");
    // BytesWritable key2 = new BytesWritable();
    int row = 0;
    Tuple RowValue2 = null;
    while (it3.hasNext()) {
      // Last row value
      RowValue2 = it3.next();
      row++;
      if (row == 1) {
        Assert.assertEquals(false, RowValue2.get(5));
        Assert.assertEquals(2, RowValue2.get(4));
        Assert.assertEquals(1002L, RowValue2.get(3));
        Assert.assertEquals(3.1, RowValue2.get(2));
        Assert.assertEquals("hello world 2", RowValue2.get(1));
        Assert.assertEquals("hello byte 2", RowValue2.get(0).toString());
      }
    }
    Assert.assertEquals(1, row);
  }

  @Test
  // negative, schema description is different from input tuple, less column
  // numbers
  public void testStorerNegative1() throws ExecException, IOException {

    String query = "records = LOAD '" + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    pigServer.registerQuery(query);

    Iterator<Tuple> it = pigServer.openIterator("records");
    while (it.hasNext()) {
      Tuple RowValue = it.next();
      System.out.println(RowValue);
    }

    try {
        Path newPath = new Path(getCurrentMethodName());
        ExecJob pigJob = pigServer
            .store(
                "records",
                new Path(newPath, "store").toString(),
                TableStorer.class.getCanonicalName()
                    + "('[s7, s2]; [s3, s4]')");
    } catch (Exception e) {
        System.out.println(e);
        return;
    }
    Assert.fail("Exception expected");
  }

  @Test
  // negative, storage hint duplicate the columns
  public void testStorerNegative2() throws ExecException, IOException {

    String query = "records = LOAD '" + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    pigServer.registerQuery(query);

    Iterator<Tuple> it = pigServer.openIterator("records");
    while (it.hasNext()) {
      Tuple RowValue = it.next();
      System.out.println(RowValue);
    }

    try {
        Path newPath = new Path(getCurrentMethodName());
        
        ExecJob pigJob = pigServer
              .store(
                  "records",
                  new Path(newPath, "store").toString(),
                  TableStorer.class.getCanonicalName()
                      + "('[s1, s2]; [s1, s4]')");
    } catch(Exception e) {
        System.out.println(e);
        return;
    }
    Assert.fail("Exception expected");
  }

  @Test
  // negative, storage hint duplicate the column groups
  public void testStorerNegative3() throws ExecException, IOException {

    String query = "records = LOAD '" + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    pigServer.registerQuery(query);

    Iterator<Tuple> it = pigServer.openIterator("records");
    while (it.hasNext()) {
      Tuple RowValue = it.next();
      System.out.println(RowValue);
    }
    try{
        Path newPath = new Path(getCurrentMethodName());
    
        ExecJob pigJob = pigServer
            .store(
                "records",
                new Path(newPath, "store").toString(),
                TableStorer.class.getCanonicalName()
                    + "('[s1]; [s1]')");
    } catch(Exception e) {
        System.out.println(e);
        return;
    }
    Assert.fail("Exception expected");

  }

  // @Test
  // negative, schema description is different from input tuple, different
  // data types for columns
  public void testStorerNegative4() throws ExecException, IOException {

    String query = "records = LOAD '" + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader();";
    pigServer.registerQuery(query);

    Iterator<Tuple> it = pigServer.openIterator("records");
    while (it.hasNext()) {
      Tuple RowValue = it.next();
      System.out.println(RowValue);
    }

    Path newPath = new Path(getCurrentMethodName());
    ExecJob pigJob = pigServer
        .store(
            "records",
            new Path(newPath, "store").toString(),
            TableStorer.class.getCanonicalName()
                + "('[s1, s2]; [s3, s4]')");
    Assert.assertNotNull(pigJob.getException());
    System.out.println(pigJob.getException());
  }

  @Test
  // Store negative, store to same path. Store should fail
  public void testStorer5() throws ExecException, IOException {   
    //
    // Use pig LOAD to load testing data for store
    //
    String query = "records = LOAD '"
        + path.toString()
        + "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
    pigServer.registerQuery(query);

    //
    // Use pig STORE to store testing data
    //
    System.out.println("path = " + path);
    try {
        ExecJob pigJob = pigServer
            .store(
                "records",
                path.toString(),
                TableStorer.class.getCanonicalName()
                    + "('[s1, s2]; [s3, s4]')");
    } catch(Exception e) {
        System.out.println(e);
        return;
    }
    Assert.fail("Exception expected");
  }
}