/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hive.hcatalog.mapreduce;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hive.hcatalog.common.ErrorType;
import org.apache.hive.hcatalog.common.HCatException;
import org.apache.hive.hcatalog.data.DefaultHCatRecord;
import org.apache.hive.hcatalog.data.HCatRecord;
import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.data.schema.HCatSchemaUtils;
import org.junit.Test;
import static junit.framework.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertFalse;
public class TestHCatPartitioned extends HCatMapReduceTest {
private static List<HCatRecord> writeRecords;
private static List<HCatFieldSchema> partitionColumns;
public TestHCatPartitioned(String formatName, String serdeClass, String inputFormatClass,
String outputFormatClass) throws Exception {
super(formatName, serdeClass, inputFormatClass, outputFormatClass);
tableName = "testHCatPartitionedTable_" + formatName;
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 20; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("strvalue" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
}
@Override
protected List<FieldSchema> getPartitionKeys() {
List<FieldSchema> fields = new ArrayList<FieldSchema>();
//Defining partition names in unsorted order
fields.add(new FieldSchema("PaRT1", serdeConstants.STRING_TYPE_NAME, ""));
fields.add(new FieldSchema("part0", serdeConstants.INT_TYPE_NAME, ""));
return fields;
}
@Override
protected List<FieldSchema> getTableColumns() {
List<FieldSchema> fields = new ArrayList<FieldSchema>();
fields.add(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, ""));
fields.add(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, ""));
return fields;
}
@Test
public void testHCatPartitionedTable() throws Exception {
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value1");
partitionMap.put("part0", "501");
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
partitionMap.clear();
partitionMap.put("PART1", "p1value2");
partitionMap.put("PART0", "502");
runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
//Test for duplicate publish -- this will either fail on job creation time
// and throw an exception, or will fail at runtime, and fail the job.
IOException exc = null;
try {
Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
assertEquals(!isTableImmutable(),j.isSuccessful());
} catch (IOException e) {
exc = e;
assertTrue(exc instanceof HCatException);
assertTrue(ErrorType.ERROR_DUPLICATE_PARTITION.equals(((HCatException) exc).getErrorType()));
}
if (!isTableImmutable()){
assertNull(exc);
}
//Test for publish with invalid partition key name
exc = null;
partitionMap.clear();
partitionMap.put("px1", "p1value2");
partitionMap.put("px0", "502");
try {
Job j = runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
assertFalse(j.isSuccessful());
} catch (IOException e) {
exc = e;
assertNotNull(exc);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_MISSING_PARTITION_KEY, ((HCatException) exc).getErrorType());
}
//Test for publish with missing partition key values
exc = null;
partitionMap.clear();
partitionMap.put("px", "512");
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
} catch (IOException e) {
exc = e;
}
assertNotNull(exc);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_INVALID_PARTITION_VALUES, ((HCatException) exc).getErrorType());
//Test for null partition value map
exc = null;
try {
runMRCreate(null, partitionColumns, writeRecords, 20, false);
} catch (IOException e) {
exc = e;
}
assertTrue(exc == null);
// assertTrue(exc instanceof HCatException);
// assertEquals(ErrorType.ERROR_PUBLISHING_PARTITION, ((HCatException) exc).getErrorType());
// With Dynamic partitioning, this isn't an error that the keyValues specified didn't values
//Read should get 10 + 20 rows if immutable, 50 (10+20+20) if mutable
if (isTableImmutable()){
runMRRead(30);
} else {
runMRRead(50);
}
//Read with partition filter
runMRRead(10, "part1 = \"p1value1\"");
runMRRead(10, "part0 = \"501\"");
if (isTableImmutable()){
runMRRead(20, "part1 = \"p1value2\"");
runMRRead(30, "part1 = \"p1value1\" or part1 = \"p1value2\"");
runMRRead(20, "part0 = \"502\"");
runMRRead(30, "part0 = \"501\" or part0 = \"502\"");
} else {
runMRRead(40, "part1 = \"p1value2\"");
runMRRead(50, "part1 = \"p1value1\" or part1 = \"p1value2\"");
runMRRead(40, "part0 = \"502\"");
runMRRead(50, "part0 = \"501\" or part0 = \"502\"");
}
tableSchemaTest();
columnOrderChangeTest();
hiveReadTest();
}
//test that new columns gets added to table schema
private void tableSchemaTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(4, tableSchema.getFields().size());
//Update partition schema to have 3 fields
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 20; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("strvalue" + i);
objList.add("str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value5");
partitionMap.put("part0", "505");
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
tableSchema = getTableSchema();
//assert that c3 has got added to table schema
assertEquals(5, tableSchema.getFields().size());
assertEquals("c1", tableSchema.getFields().get(0).getName());
assertEquals("c2", tableSchema.getFields().get(1).getName());
assertEquals("c3", tableSchema.getFields().get(2).getName());
assertEquals("part1", tableSchema.getFields().get(3).getName());
assertEquals("part0", tableSchema.getFields().get(4).getName());
//Test that changing column data type fails
partitionMap.clear();
partitionMap.put("part1", "p1value6");
partitionMap.put("part0", "506");
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.INT_TYPE_NAME, "")));
IOException exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 20, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_TYPE_MISMATCH, ((HCatException) exc).getErrorType());
//Test that partition key is not allowed in data
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("part1", serdeConstants.STRING_TYPE_NAME, "")));
List<HCatRecord> recordsContainingPartitionCols = new ArrayList<HCatRecord>(20);
for (int i = 0; i < 20; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("c2value" + i);
objList.add("c3value" + i);
objList.add("p1value6");
recordsContainingPartitionCols.add(new DefaultHCatRecord(objList));
}
exc = null;
try {
runMRCreate(partitionMap, partitionColumns, recordsContainingPartitionCols, 20, true);
} catch (IOException e) {
exc = e;
}
List<HCatRecord> records = runMRRead(20, "part1 = \"p1value6\"");
assertEquals(20, records.size());
records = runMRRead(20, "part0 = \"506\"");
assertEquals(20, records.size());
Integer i = 0;
for (HCatRecord rec : records) {
assertEquals(5, rec.size());
assertEquals(rec.get(0), i);
assertEquals(rec.get(1), "c2value" + i);
assertEquals(rec.get(2), "c3value" + i);
assertEquals(rec.get(3), "p1value6");
assertEquals(rec.get(4), 506);
i++;
}
}
//check behavior while change the order of columns
private void columnOrderChangeTest() throws Exception {
HCatSchema tableSchema = getTableSchema();
assertEquals(5, tableSchema.getFields().size());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c3", serdeConstants.STRING_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
objList.add("co str2value" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
Map<String, String> partitionMap = new HashMap<String, String>();
partitionMap.put("part1", "p1value8");
partitionMap.put("part0", "508");
Exception exc = null;
try {
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
} catch (IOException e) {
exc = e;
}
assertTrue(exc != null);
assertTrue(exc instanceof HCatException);
assertEquals(ErrorType.ERROR_SCHEMA_COLUMN_MISMATCH, ((HCatException) exc).getErrorType());
partitionColumns = new ArrayList<HCatFieldSchema>();
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c1", serdeConstants.INT_TYPE_NAME, "")));
partitionColumns.add(HCatSchemaUtils.getHCatFieldSchema(new FieldSchema("c2", serdeConstants.STRING_TYPE_NAME, "")));
writeRecords = new ArrayList<HCatRecord>();
for (int i = 0; i < 10; i++) {
List<Object> objList = new ArrayList<Object>();
objList.add(i);
objList.add("co strvalue" + i);
writeRecords.add(new DefaultHCatRecord(objList));
}
runMRCreate(partitionMap, partitionColumns, writeRecords, 10, true);
if (isTableImmutable()){
//Read should get 10 + 20 + 10 + 10 + 20 rows
runMRRead(70);
} else {
runMRRead(90); // +20 from the duplicate publish
}
}
//Test that data inserted through hcatoutputformat is readable from hive
private void hiveReadTest() throws Exception {
String query = "select * from " + tableName;
int retCode = driver.run(query).getResponseCode();
if (retCode != 0) {
throw new Exception("Error " + retCode + " running query " + query);
}
ArrayList<String> res = new ArrayList<String>();
driver.getResults(res);
if (isTableImmutable()){
//Read should get 10 + 20 + 10 + 10 + 20 rows
assertEquals(70, res.size());
} else {
assertEquals(90, res.size()); // +20 from the duplicate publish
}
}
}