/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.drill.exec.store.avro; import com.google.common.collect.Lists; import org.apache.drill.BaseTestQuery; import org.apache.drill.TestBuilder; import org.apache.drill.common.exceptions.UserException; import org.apache.drill.common.exceptions.UserRemoteException; import org.apache.drill.exec.util.JsonStringHashMap; import org.junit.Assert; import org.junit.Test; import java.util.List; import java.util.Map; import static org.apache.drill.TestBuilder.listOf; /** * Unit tests for Avro record reader. */ public class AvroFormatTest extends BaseTestQuery { // XXX // 1. Need to test nested field names with same name as top-level names for conflict. // 2. Avro supports recursive types? Can we test this? @Test public void testBatchCutoff() throws Exception { final AvroTestUtil.AvroTestRecordWriter testSetup = AvroTestUtil.generateSimplePrimitiveSchema_NoNullValues(5000); final String file = testSetup.getFilePath(); final String sql = "select a_string, b_int, c_long, d_float, e_double, f_bytes, h_boolean, g_null " + "from dfs_test.`" + file + "`"; test(sql); testBuilder() .sqlQuery(sql) .unOrdered() .expectsNumBatches(2) .baselineRecords(testSetup.getExpectedRecords()) .go(); } /** * Previously a bug in the Avro table metadata would cause wrong results * for some queries on varchar types, as a length was not provided during metadata * population. In some cases casts were being added with the default length * of 1 and truncating values. * * @throws Exception */ @Test public void testFiltersOnVarchar() throws Exception { final AvroTestUtil.AvroTestRecordWriter testSetup = AvroTestUtil.generateSimplePrimitiveSchema_NoNullValues(5000); final String file = testSetup.getFilePath(); final String sql = "select a_string " + "from dfs_test.`" + file + "` where a_string = 'a_1'"; testBuilder() .sqlQuery(sql) .unOrdered() .baselineColumns("a_string") .baselineValues("a_1") .go(); final String sql2 = "select a_string " + "from dfs_test.`" + file + "` where a_string IN ('a_1')"; testBuilder() .sqlQuery(sql2) .unOrdered() .baselineColumns("a_string") .baselineValues("a_1") .go(); } @Test public void testFiltersOnVarBinary() throws Exception { final AvroTestUtil.AvroTestRecordWriter testSetup = AvroTestUtil.generateSimplePrimitiveSchema_NoNullValues(5000); final String file = testSetup.getFilePath(); final String sql = "select f_bytes " + "from dfs_test.`" + file + "` where f_bytes = BINARY_STRING('\\x61\\x31')"; TestBuilder testBuilder = testBuilder() .sqlQuery(sql) .unOrdered() .baselineColumns("f_bytes"); for (int i = 0; i < 500; i++) { testBuilder.baselineValues(new byte[] {'a', '1'}); } testBuilder.go(); final String sql2 = "select f_bytes " + "from dfs_test.`" + file + "` where f_bytes IN (BINARY_STRING('\\x61\\x31'))"; testBuilder = testBuilder() .sqlQuery(sql2) .unOrdered() .baselineColumns("f_bytes"); for (int i = 0; i < 500; i++) { testBuilder.baselineValues(new byte[] {'a', '1'}); } testBuilder.go(); } @Test public void testSimplePrimitiveSchema_NoNullValues() throws Exception { final AvroTestUtil.AvroTestRecordWriter testSetup = AvroTestUtil.generateSimplePrimitiveSchema_NoNullValues(); final String file = testSetup.getFilePath(); final String sql = "select a_string, b_int, c_long, d_float, e_double, f_bytes, h_boolean, g_null " + "from dfs_test.`" + file + "`"; test(sql); testBuilder() .sqlQuery(sql) .unOrdered() .baselineRecords(testSetup.getExpectedRecords()) .go(); } @Test public void testSimplePrimitiveSchema_StarQuery() throws Exception { simpleAvroTestHelper(AvroTestUtil.generateSimplePrimitiveSchema_NoNullValues(), "select * from dfs_test.`%s`"); } private List<Map<String, Object>> project( List<Map<String,Object>> incomingRecords, List<String> projectCols) { List<Map<String,Object>> output = Lists.newArrayList(); for (Map<String, Object> incomingRecord : incomingRecords) { final JsonStringHashMap<String, Object> newRecord = new JsonStringHashMap<>(); for (String s : incomingRecord.keySet()) { if (projectCols.contains(s)) { newRecord.put(s, incomingRecord.get(s)); } } output.add(newRecord); } return output; } @Test public void testSimplePrimitiveSchema_SelectColumnSubset() throws Exception { final AvroTestUtil.AvroTestRecordWriter testSetup = AvroTestUtil.generateSimplePrimitiveSchema_NoNullValues(); final String file = testSetup.getFilePath(); final String sql = "select h_boolean, e_double from dfs_test.`" + file + "`"; List<String> projectList = Lists.newArrayList("`h_boolean`", "`e_double`"); testBuilder() .sqlQuery(sql) .unOrdered() .baselineRecords(project(testSetup.getExpectedRecords(), projectList)) .go(); } @Test public void testSimplePrimitiveSchema_NoColumnsExistInTheSchema() throws Exception { final String file = AvroTestUtil.generateSimplePrimitiveSchema_NoNullValues().getFilePath(); final String sql = "select h_dummy1, e_dummy2 from dfs_test.`" + file + "`"; try { test(sql); Assert.fail("Test should fail as h_dummy1 and e_dummy2 does not exist."); } catch(UserException ue) { Assert.assertTrue("Test should fail as h_dummy1 and e_dummy2 does not exist.", ue.getMessage().contains("Column 'h_dummy1' not found in any table")); } } @Test public void testSimplePrimitiveSchema_OneExistAndOneDoesNotExistInTheSchema() throws Exception { final String file = AvroTestUtil.generateSimplePrimitiveSchema_NoNullValues().getFilePath(); final String sql = "select h_boolean, e_dummy2 from dfs_test.`" + file + "`"; try { test(sql); Assert.fail("Test should fail as e_dummy2 does not exist."); } catch(UserException ue) { Assert.assertTrue("Test should fail as e_dummy2 does not exist.", true); } } @Test public void testSimpleArraySchema_NoNullValues() throws Exception { final String file = AvroTestUtil.generateSimpleArraySchema_NoNullValues().getFilePath(); final String sql = "select a_string, c_string_array[0], e_float_array[2] " + "from dfs_test.`" + file + "`"; test(sql); } @Test public void testSimpleArraySchema_StarQuery() throws Exception { simpleAvroTestHelper(AvroTestUtil.generateSimpleArraySchema_NoNullValues(), "select * from dfs_test.`%s`"); } @Test public void testDoubleNestedSchema_NoNullValues_NotAllColumnsProjected() throws Exception { final String file = AvroTestUtil.generateDoubleNestedSchema_NoNullValues().getFilePath(); final String sql = "select t.c_record.nested_1_int, " + "t.c_record.nested_1_record.double_nested_1_int " + "from dfs_test.`" + file + "` t"; test(sql); } @Test public void testSimpleNestedSchema_NoNullValues() throws Exception { final AvroTestUtil.AvroTestRecordWriter testSetup = AvroTestUtil.generateSimpleNestedSchema_NoNullValues(); final String file = testSetup.getFilePath(); final String sql = "select a_string, b_int, t.c_record.nested_1_string, t.c_record.nested_1_int " + "from dfs_test.`" + file + "` t"; test(sql); } @Test public void testSimpleNestedSchema_StarQuery() throws Exception { final AvroTestUtil.AvroTestRecordWriter testSetup = AvroTestUtil.generateSimpleNestedSchema_NoNullValues(); final String file = testSetup.getFilePath(); final String sql = "select * from dfs_test.`" + file + "`"; testBuilder() .sqlQuery(sql) .unOrdered() .baselineRecords(testSetup.getExpectedRecords()) .go(); } @Test public void testDoubleNestedSchema_NoNullValues() throws Exception { final String file = AvroTestUtil.generateDoubleNestedSchema_NoNullValues().getFilePath(); final String sql = "select a_string, b_int, t.c_record.nested_1_string, t.c_record.nested_1_int, " + "t.c_record.nested_1_record.double_nested_1_string, " + "t.c_record.nested_1_record.double_nested_1_int " + "from dfs_test.`" + file + "` t"; test(sql); final String sql2 = "select t.c_record.nested_1_string " + "from dfs_test.`" + file + "` t limit 1"; TestBuilder testBuilder = testBuilder() .sqlQuery(sql2) .unOrdered() .baselineColumns("EXPR$0"); for (int i = 0; i < 1; i++) { testBuilder .baselineValues("nested_1_string_" + i); } testBuilder.go(); } @Test public void testDoubleNestedSchema_StarQuery() throws Exception { simpleAvroTestHelper(AvroTestUtil.generateDoubleNestedSchema_NoNullValues(), "select * from dfs_test.`%s`"); } private static void simpleAvroTestHelper(AvroTestUtil.AvroTestRecordWriter testSetup, final String sql) throws Exception { final String file = testSetup.getFilePath(); final String sqlWithTable = String.format(sql, file); testBuilder() .sqlQuery(sqlWithTable) .unOrdered() .baselineRecords(testSetup.getExpectedRecords()) .go(); } @Test public void testSimpleEnumSchema_NoNullValues() throws Exception { final AvroTestUtil.AvroTestRecordWriter testSetup = AvroTestUtil.generateSimpleEnumSchema_NoNullValues(); final String file = testSetup.getFilePath(); final String sql = "select a_string, b_enum from dfs_test.`" + file + "`"; List<String> projectList = Lists.newArrayList("`a_string`", "`b_enum`"); testBuilder() .sqlQuery(sql) .unOrdered() .baselineRecords(project(testSetup.getExpectedRecords(), projectList)) .go(); } @Test public void testSimpleEnumSchema_StarQuery() throws Exception { simpleAvroTestHelper(AvroTestUtil.generateSimpleEnumSchema_NoNullValues(), "select * from dfs_test.`%s`"); } @Test public void testSimpleUnionSchema_StarQuery() throws Exception { simpleAvroTestHelper(AvroTestUtil.generateUnionSchema_WithNullValues(), "select * from dfs_test.`%s`"); } @Test public void testShouldFailSimpleUnionNonNullSchema_StarQuery() throws Exception { final String file = AvroTestUtil.generateUnionSchema_WithNonNullValues().getFilePath(); final String sql = "select * from dfs_test.`" + file + "`"; try { test(sql); Assert.fail("Test should fail as union is only supported for optional fields"); } catch(UserRemoteException e) { String message = e.getMessage(); Assert.assertTrue(message.contains("Avro union type must be of the format : [\"null\", \"some-type\"]")); } } @Test public void testNestedUnionSchema_withNullValues() throws Exception { final String file = AvroTestUtil.generateUnionNestedSchema_withNullValues().getFilePath(); final String sql = "select t.c_record.nested_1_string,t.c_record.nested_1_int from dfs_test.`" + file + "` t"; test(sql); } /** * See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a> * */ @Test public void testFlattenPrimitiveArray() throws Exception { final String file = AvroTestUtil.generateSimpleArraySchema_NoNullValues().getFilePath(); final String sql = "select a_string, flatten(c_string_array) as array_item " + "from dfs_test.`" + file + "` t"; TestBuilder testBuilder = testBuilder().sqlQuery(sql).unOrdered() .baselineColumns("a_string", "array_item"); for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) { for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) { testBuilder.baselineValues("a_" + i, "c_string_array_" + i + "_" + j); } } testBuilder.go(); } private TestBuilder nestedArrayQueryTestBuilder(String file) { final String sql = "select rec_nr, array_item['nested_1_int'] as array_item_nested_int from " + "(select a_int as rec_nr, flatten(t.b_array) as array_item " + "from dfs_test.`" + file + "` t) a"; TestBuilder testBuilder = testBuilder().sqlQuery(sql).unOrdered().baselineColumns("rec_nr", "array_item_nested_int"); return testBuilder; } /** * See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a> */ @Test public void testFlattenComplexArray() throws Exception { final String file = AvroTestUtil.generateNestedArraySchema().getFilePath(); TestBuilder testBuilder = nestedArrayQueryTestBuilder(file); for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i++) { for (int j = 0; j < AvroTestUtil.ARRAY_SIZE; j++) { testBuilder.baselineValues(i, j); } } testBuilder.go(); } /** * See <a href="https://issues.apache.org/jira/browse/DRILL-4574"></a> */ @Test public void testFlattenEmptyComplexArrayMustYieldNoResults() throws Exception { final String file = AvroTestUtil.generateNestedArraySchema(AvroTestUtil.RECORD_COUNT, 0).getFilePath(); TestBuilder testBuilder = nestedArrayQueryTestBuilder(file); testBuilder.expectsEmptyResultSet(); } @Test public void testNestedUnionArraySchema_withNullValues() throws Exception { final String file = AvroTestUtil.generateUnionNestedArraySchema_withNullValues().getFilePath(); final String sql = "select t.c_array[0].nested_1_string,t.c_array[0].nested_1_int from dfs_test.`" + file + "` t"; test(sql); } @Test public void testMapSchema_withNullValues() throws Exception { final String file = AvroTestUtil.generateMapSchema_withNullValues().getFilePath(); final String sql = "select c_map['key1'],c_map['key2'] from dfs_test.`" + file + "`"; test(sql); } @Test public void testMapSchemaComplex_withNullValues() throws Exception { final String file = AvroTestUtil.generateMapSchemaComplex_withNullValues().getFilePath(); final String sql = "select d_map['key1'] nested_key1, d_map['key2'] nested_key2 from dfs_test.`" + file + "`"; TestBuilder testBuilder = testBuilder() .sqlQuery(sql) .unOrdered() .baselineColumns("nested_key1", "nested_key2"); final List<Object> expectedList = Lists.newArrayList(); for (int i = 0; i < AvroTestUtil.ARRAY_SIZE; i++) { expectedList.add((double)i); } final List<Object> emptyList = listOf(); for (int i = 0; i < AvroTestUtil.RECORD_COUNT; i += 2) { testBuilder.baselineValues(expectedList, expectedList); testBuilder.baselineValues(emptyList, emptyList); } testBuilder.go(); } @Test public void testStringAndUtf8Data() throws Exception { simpleAvroTestHelper(AvroTestUtil.generateStringAndUtf8Data(), "select * from dfs_test.`%s`"); } @Test public void testLinkedList() throws Exception { final String file = AvroTestUtil.generateLinkedList(); final String sql = "select * from dfs_test.`" + file + "`"; test(sql); } @Test public void testCountStar() throws Exception { final String file = AvroTestUtil.generateStringAndUtf8Data().getFilePath(); final String sql = "select count(*) as row_count from dfs_test.`" + file + "`"; testBuilder() .sqlQuery(sql) .ordered() .baselineColumns("row_count") .baselineValues((long)AvroTestUtil.RECORD_COUNT) .go(); } }