/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.scheme.hive.parquet; import cascading.tap.hive.HiveTableDescriptor; import cascading.tuple.Fields; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.hive.ql.io.parquet.convert.HiveSchemaConverter; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Properties; public class HiveParquetSchemeHelper { private final static String COLUMNS = "columns"; private final static String COLUMNS_TYPES = "columns.types"; public static List<String> getTableColumns( HiveTableDescriptor hiveTableDescriptor) { return Arrays.asList(hiveTableDescriptor.getColumnNames()); } public static List<TypeInfo> getColumnsDataTypes( HiveTableDescriptor hiveTableDescriptor) { String dataTypes = StringUtils.join( hiveTableDescriptor.getColumnTypes(), ":"); return TypeInfoUtils.getTypeInfosFromTypeString(dataTypes); } public static String getParquetSchemeMessage( HiveTableDescriptor hiveTableDescriptor) { return HiveSchemaConverter.convert( getTableColumns(hiveTableDescriptor), getColumnsDataTypes(hiveTableDescriptor)).toString(); } public static Properties getTableProperties( HiveTableDescriptor hiveTableDescriptor) { Properties properties = new Properties(); String columns = StringUtils.join(hiveTableDescriptor.getColumnNames(), ","); String columnTypes = StringUtils.join( hiveTableDescriptor.getColumnTypes(), ":"); properties.put(COLUMNS, columns); properties.put(COLUMNS_TYPES, columnTypes); return properties; } public static String getParquetSchemeMessage(Fields sinkFields, String[] columnTypes) { List<String> columnName = new ArrayList<String>(); for (int i = 0; i < sinkFields.size(); i++) { columnName.add(sinkFields.get(i).toString()); } String dataTypes = StringUtils.join(columnTypes, ":"); return HiveSchemaConverter.convert(columnName, TypeInfoUtils.getTypeInfosFromTypeString(dataTypes)).toString(); } }