/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF * licenses this file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package org.apache.hadoop.zebra.io; import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.zebra.parser.ParseException; import org.apache.hadoop.zebra.io.BasicTable.Reader.RangeSplit; import org.apache.hadoop.zebra.schema.Schema; import org.apache.hadoop.zebra.types.TypesUtils; import org.apache.pig.data.DataBag; import org.apache.pig.data.DataByteArray; import org.apache.pig.data.Tuple; import org.junit.AfterClass; import org.junit.Assert; import org.junit.BeforeClass; import org.junit.Test; public class TestDropColumnGroup { Log LOG = LogFactory.getLog(TestDropColumnGroup.class); private static Path path; private static Configuration conf; private static FileSystem fs; @BeforeClass public static void setUpOnce() throws IOException { TestBasicTable.setUpOnce(); path = new Path(TestBasicTable.rootPath, "DropCGTest"); conf = TestBasicTable.conf; Log LOG = LogFactory.getLog(TestDropColumnGroup.class); fs = path.getFileSystem(conf); } @AfterClass public static void tearDown() throws IOException { BasicTable.drop(path, conf); } /** * Utitility function to open a table with a given projection and verify that * certain fields in the returned tuple are null and certain fields are not. */ void verifyScanner(Path path, Configuration conf, String projection, boolean isNullExpected[], int numRowsToRead) throws IOException, ParseException { BasicTable.Reader reader = new BasicTable.Reader(path, conf); reader.setProjection(projection); TableScanner scanner = reader.getScanner(null, true); Tuple row = TypesUtils.createTuple(reader.getSchema()); for (int i = 0; i < numRowsToRead; i++) { scanner.getValue(row); for (int f = 0; f < isNullExpected.length; f++) { if (isNullExpected[f] ^ row.get(f) == null) { throw new IOException("Verification failure at field " + f + " row " + i + " : expected " + (isNullExpected[f] ? "NULL" : "nonNULL") + " but got opposite."); } } scanner.advance(); } scanner.close(); } int countRows(Path path, Configuration conf, String projection) throws IOException, ParseException { BasicTable.Reader reader = new BasicTable.Reader(path, conf); reader.setProjection(projection); TableScanner scanner = reader.getScanner(null, true); int count = 0; while (!scanner.atEnd()) { count++; scanner.advance(); } scanner.close(); return count; } @Test public void testDropColumnGroup() throws IOException, ParseException { /* * Tests basic drop columns feature. Also tests that fields in dropped * columns can be read the value returned is null. */ if (fs.exists(path)) { BasicTable.drop(path, conf); } int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true); int rowsToRead = Math.min(10, numRows); // normal table. verifyScanner(path, conf, "a, c, x", new boolean[] { false, false, true }, rowsToRead); // Now delete ([c, d) BasicTable.dropColumnGroup(path, conf, "CG1"); // check various read cases. verifyScanner(path, conf, "c, a", new boolean[] { true, false }, rowsToRead); verifyScanner(path, conf, "c, a", new boolean[] { true, false }, rowsToRead); verifyScanner(path, conf, "c, a, b, f, d, e", new boolean[] { true, false, false, false, true, false }, rowsToRead); BasicTable.dumpInfo(path.toString(), System.err, conf); // Drop CG0 ([a, b]) BasicTable.dropColumnGroup(path, conf, "CG0"); verifyScanner(path, conf, "a, b", new boolean[] { true, true }, rowsToRead); // Drop remaining CG2 BasicTable.dropColumnGroup(path, conf, "CG2"); verifyScanner(path, conf, "a, b, c, d, e, f", new boolean[] { true, true, true, true, true, true }, rowsToRead); // Now make sure the reader reports zero rows. Assert.assertTrue(countRows(path, conf, "c, e, b") == 0); // delete the table BasicTable.drop(path, conf); /* * Try similar tests with range splits. */ // 5 splits and 50 rows numRows = TestBasicTable.createBasicTable(5, 50, "a, b, c, d, e, f", "[a, b]; [c, d]; [e] as myCG", null, path, true); BasicTable.dropColumnGroup(path, conf, "myCG"); verifyScanner(path, conf, "e, c, g, b", new boolean[] { true, false, true, false }, numRows); TestBasicTable.doRangeSplit(new int[] { 4, 0, 2 }, numRows, "a, b, c, e, f, x", path); // Remove another CG. BasicTable.dropColumnGroup(path, conf, "CG0"); TestBasicTable.doRangeSplit(new int[] { 4, 0, 2, 3, 1 }, numRows, "a, y, c, e, f, x", path); BasicTable.drop(path, conf); } @Test public void test2() throws IOException, ParseException { /* * Tests concurrent drop CGs */ if (fs.exists(path)) { BasicTable.drop(path, conf); } int numRows = TestBasicTable.createBasicTable(1, 10, "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10," + "f11,f12,f13,f14,f15,f16,f17,f18,f19,f20," + "f21,f22,f23,f24,f25,f26,f27,f28,f29,f30," + "f31,f32,f33,f34,f35,f36,f37,f38,f39,f40," + "f41,f42,f43,f44,f45,f46,f47,f48,f49,f50", "[f1];[f2];[f3];[f4];[f5];[f6];[f7];[f8];[f9];[f10];" + "[f11];[f12];[f13];[f14];[f15];[f16];[f17];[f18];[f19];[f20];" + "[f21];[f22];[f23];[f24];[f25];[f26];[f27];[f28];[f29];[f30];" + "[f31];[f32];[f33];[f34];[f35];[f36];[f37];[f38];[f39];[f40];" + "[f41];[f42];[f43];[f44];[f45];[f46];[f47];[f48];[f49];[f50]", null, path, true); System.out.println("First dump:"); BasicTable.dumpInfo(path.toString(), System.out, conf); int rowsToRead = Math.min(10, numRows); // normal table. verifyScanner(path, conf, "f1, f3, xx", new boolean[] { false, false, true }, rowsToRead); // create a thread for each dropCG DropThread[] threads = new DropThread[50]; for (int i = 0; i < threads.length; i++) { threads[i] = new DropThread(i, 50); } // start the threads for (int j = 0; j < threads.length; j++) { threads[j].start(); } for (Thread thr : threads) { try { thr.join(); } catch (InterruptedException e) { e.printStackTrace(); } } // check various read cases. verifyScanner(path, conf, "f3, f1, f2, f6, f4, f5", new boolean[] { true, true, true, true, true, true }, rowsToRead); System.out.println("second dump"); BasicTable.dumpInfo(path.toString(), System.out, conf); // Now make sure the reader reports zero rows. Assert.assertTrue(countRows(path, conf, "f3, f5, f2") == 0); // delete the table BasicTable.drop(path, conf); } @Test public void test3() throws IOException, ParseException { /* * Tests concurrrent drop CGs while one fails */ if (fs.exists(path)) { BasicTable.drop(path, conf); } int numRows = TestBasicTable.createBasicTable(1, 10, "f1,f2,f3,f4,f5,f6,f7,f8,f9,f10," + "f11,f12,f13,f14,f15,f16,f17,f18,f19,f20," + "f21,f22,f23,f24,f25,f26,f27,f28,f29,f30," + "f31,f32,f33,f34,f35,f36,f37,f38,f39,f40," + "f41,f42,f43,f44,f45,f46,f47,f48,f49,f50", "[f1];[f2];[f3];[f4];[f5];[f6];[f7];[f8];[f9];[f10];" + "[f11];[f12];[f13];[f14];[f15];[f16];[f17];[f18];[f19];[f20];" + "[f21];[f22];[f23];[f24];[f25];[f26];[f27];[f28];[f29];[f30];" + "[f31];[f32];[f33];[f34];[f35];[f36];[f37];[f38];[f39];[f40];" + "[f41];[f42];[f43];[f44];[f45];[f46];[f47];[f48];[f49];[f50]", null, path, true); System.out.println("First dump:"); BasicTable.dumpInfo(path.toString(), System.out, conf); int rowsToRead = Math.min(10, numRows); // normal table. verifyScanner(path, conf, "f1, f3, xx", new boolean[] { false, false, true }, rowsToRead); // create a thread for each dropCG DropThread[] threads = new DropThread[60]; for (int i = 0; i < threads.length; i++) { threads[i] = new DropThread(i, 50); } // start the threads for (int j = 0; j < threads.length; j++) { threads[j].start(); } for (Thread thr : threads) { try { thr.join(); } catch (InterruptedException e) { e.printStackTrace(); } } // check various read cases. verifyScanner(path, conf, "f3, f1, f2, f6, f4, f5", new boolean[] { true, true, true, true, true, true }, rowsToRead); System.out.println("second dump"); BasicTable.dumpInfo(path.toString(), System.out, conf); // Now make sure the reader reports zero rows. Assert.assertTrue(countRows(path, conf, "f3, f5, f2") == 0); // delete the table BasicTable.drop(path, conf); } @Test public void test5() throws IOException, ParseException { /* * Tests drop CGs while reading the same CGs */ System.out.println("######int test 5"); if (fs.exists(path)) { BasicTable.drop(path, conf); } int numRows = TestBasicTable.createBasicTable(1, 100000, "a, b, c, d, e, f, g, h, i, j, k, l, m, n", "[a, b]; [c, d]; [e]; [f]; [g]; [h]; [i]; [j]; [k]; [l]; [m]; [n]", null, path, true); System.out.println("in test5 , dump infor 1"); BasicTable.dumpInfo(path.toString(), System.out, conf); int minRowsToRead = 10000; int numOfReadThreads = 20; int rowsToRead = Math.min(minRowsToRead, numRows); // normal table. verifyScanner(path, conf, "a, c, x", new boolean[] { false, false, true }, rowsToRead); // create a thread for each dropCG DropThread[] dropThreads = new DropThread[12]; for (int i = 0; i < dropThreads.length; i++) { dropThreads[i] = new DropThread(i, 12); } // start the threads for (int j = 0; j < dropThreads.length; j++) { dropThreads[j].start(); } // create read threads ReadThread[] readThreads = new ReadThread[numOfReadThreads]; for (int i = 0; i < readThreads.length; i++) { readThreads[i] = new ReadThread(i, "a, b, c, d, e, f", 1000); } // start the threads for (int j = 0; j < readThreads.length; j++) { readThreads[j].start(); } for (Thread thr : dropThreads) { try { thr.join(); } catch (InterruptedException e) { e.printStackTrace(); } } for (Thread thr : readThreads) { try { thr.join(); } catch (InterruptedException e) { e.printStackTrace(); } } verifyScanner(path, conf, "c, a, b, f, d, e", new boolean[] { true, true, true, true, true, true }, rowsToRead); System.out.println("second dump"); BasicTable.dumpInfo(path.toString(), System.out, conf); // Now make sure the reader reports zero rows. Assert.assertTrue(countRows(path, conf, "c, e, b") == 0); // delete the table BasicTable.drop(path, conf); } @Test public void test11() throws IOException, ParseException { /* * Tests test open non-existing table. */ try { new BasicTable.Reader(new Path(path.toString(), "non-existing"), conf); Assert.fail("read none existing table should fail"); } catch (Exception e) { } } @Test public void test12() throws IOException, ParseException { /* * Tests API, path is wrong */ if (fs.exists(path)) { BasicTable.drop(path, conf); } TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a];[b];[c];[d];[e];[f]", null, path, true); Path wrongPath = new Path(path.toString() + "non-existing"); try { BasicTable.dropColumnGroup(wrongPath, conf, "CG0"); Assert.fail("should throw excepiton"); } catch (Exception e) { } BasicTable.drop(path, conf); } @Test public void test13() throws IOException, ParseException { /* * Tests API, conf is null */ Path path1 = new Path(path.toString() + "13"); TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a];[b];[c];[d];[e];[f]", null, path1, true); try { BasicTable.dropColumnGroup(path1, null, "CG0"); Assert.fail("should throw excepiton"); } catch (Exception e) { } BasicTable.drop(path1, conf); } @Test public void test14() throws IOException, ParseException { /* * Tests API, CG name is empty string */ Path path1 = new Path(path.toString() + "14"); TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a];[b];[c];[d];[e];[f]", null, path1, true); try { BasicTable.dropColumnGroup(path1, conf, ""); Assert.fail("should throw excepiton"); } catch (Exception e) { } BasicTable.drop(path1, conf); } @Test public void test15() throws IOException, ParseException { /* * Tests API, CG name is null */ Path path1 = new Path(path.toString() + "15"); TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a];[b];[c];[d];[e];[f]", null, path1, true); try { BasicTable.dropColumnGroup(path1, conf, null); Assert.fail("should throw excepiton"); } catch (Exception e) { } BasicTable.drop(path1, conf); } @Test public void test16() throws IOException, ParseException { /* * Tests delete same CG multiple times */ Path path1 = new Path(path.toString() + "16"); int numRows = TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path1, true); int rowsToRead = Math.min(10, numRows); // normal table. verifyScanner(path1, conf, "a, c, x", new boolean[] { false, false, true }, rowsToRead); // Now delete ([c, d) BasicTable.dropColumnGroup(path1, conf, "CG1"); // check various read cases. verifyScanner(path1, conf, "c, a", new boolean[] { true, false }, rowsToRead); // Now delete ([c, d)again BasicTable.dropColumnGroup(path1, conf, "CG1"); verifyScanner(path1, conf, "c, a", new boolean[] { true, false }, rowsToRead); BasicTable.drop(path1, conf); } @Test public void test17() throws IOException, ParseException { /* * test rangesplit */ System.out.println("test 17"); Path path1 = new Path(path.toString() + "17"); TestBasicTable.createBasicTable(1, 10, "a, b, c, d, e, f", "[a,b,c,d,e,f]", null, path1, true); BasicTable.dropColumnGroup(path1, conf, "CG0"); BasicTable.Reader reader = new BasicTable.Reader(path1, conf); reader.setProjection("a, b, c, d, e, f"); List<RangeSplit> splits = reader.rangeSplit(1); TableScanner scanner = null; try { scanner = reader.getScanner(splits.get(0), true); } catch (Exception e) { System.out.println("in test 17, getScanner"); e.printStackTrace(); } Tuple RowValue = TypesUtils.createTuple(scanner.getSchema()); scanner.getValue(RowValue); Assert.assertEquals(null, RowValue.get(0)); Assert.assertFalse(scanner.advance()); scanner.getValue(RowValue); Assert.assertEquals(null, RowValue.get(0)); BasicTable.drop(path1, conf); } /** * A thread that performs a DropColumnGroup. */ class DropThread extends Thread { private int id; private int cntCGs; public DropThread(int id, int cntCGs) { this.id = id; this.cntCGs = cntCGs; } /** * Executes DropColumnGroup. */ public void run() { try { int total = cntCGs; int digits = 1; while (total >= 10) { ++ digits; total /= 10; } String formatString = "%0" + digits + "d"; String str = "CG" + String.format(formatString, id); System.out.println(id + ": Droping CG: " + str); BasicTable.dropColumnGroup(path, conf, str); } catch (Exception e) { System.out.println(id + " - error: " + e); e.printStackTrace(); } } } /** * A thread that performs a ReadColumnGroup. */ class ReadThread extends Thread { private int id; private String projection; private int numRowsToRead; public ReadThread(int id, String projection, int numRowsToRead) { this.id = id; this.projection = projection; this.numRowsToRead = numRowsToRead; } /** * Executes DropColumnGroup. */ public void run() { BasicTable.Reader reader = null; try { reader = new BasicTable.Reader(path, conf); reader.setProjection(projection); TableScanner scanner = reader.getScanner(null, true); Tuple row = TypesUtils.createTuple(reader.getSchema()); for (int i = 0; i < numRowsToRead; i++) { scanner.getValue(row); } scanner.advance(); scanner.close(); } catch (Exception e) { e.printStackTrace(); } } } }