/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.assembly;
import cascading.tap.hive.HivePartitionTap;
import cascading.tap.hive.HiveTableDescriptor;
import cascading.tap.hive.HiveTableDescriptor.Factory;
import cascading.tap.hive.HiveTap;
import cascading.tuple.Fields;
import hydrograph.engine.cascading.assembly.base.InputFileHiveBase;
import hydrograph.engine.cascading.assembly.infra.ComponentParameters;
import hydrograph.engine.cascading.filters.PartitionFilter;
import hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetScheme;
import hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetTableDescriptor;
import hydrograph.engine.core.component.entity.InputFileHiveParquetEntity;
import hydrograph.engine.core.component.entity.base.HiveEntityBase;
import hydrograph.engine.utilities.HiveConfigurationMapping;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class InputFileHiveParquetAssembly extends InputFileHiveBase {
/**
*
*/
private static final long serialVersionUID = 2775585858902811182L;
private static Logger LOG = LoggerFactory.getLogger(InputFileHiveParquetAssembly.class);
private HiveParquetTableDescriptor tableDesc;
private InputFileHiveParquetEntity inputFileHiveParquetEntity;
public InputFileHiveParquetAssembly(InputFileHiveParquetEntity baseComponentEntity,
ComponentParameters componentParameters) {
super(baseComponentEntity, componentParameters);
}
@Override
protected void prepareScheme() {
LOG.debug("Applying HiveParquetScheme to read data from Hive");
// HiveParquetTableDescriptor is developed specifically for handling
// Parquet File format with Hive. Hence, the object of table descriptor
// is created in its respective assembly and not in its base class.
Configuration conf = new Configuration();
conf.addResource(new Path(HiveConfigurationMapping.getHiveConf("path_to_hive_site_xml")));
Factory factory = new Factory(conf);
HiveTableDescriptor tb = factory.newInstance(inputFileHiveParquetEntity.getDatabaseName(),
inputFileHiveParquetEntity.getTableName());
tableDesc = new HiveParquetTableDescriptor(tb.getDatabaseName(), tb.getTableName(), tb.getColumnNames(),
tb.getColumnTypes(), tb.getPartitionKeys(), getHiveExternalTableLocationPath());
scheme = new HiveParquetScheme(tableDesc);
scheme.setSourceFields(tableDesc.toFields());
scheme.setSinkFields(tableDesc.toFields());
}
/*
* (non-Javadoc)
*
* @see hydrograph.engine.cascading.assembly.base.InputFileHiveBase#
* initializeHiveTap ()
*/
@Override
protected void initializeHiveTap() {
LOG.debug("Initializing Hive Tap using HiveParquetTableDescriptor");
hiveTap = new HiveTap(tableDesc, scheme);
if (inputFileHiveParquetEntity.getPartitionKeys() != null
&& inputFileHiveParquetEntity.getPartitionKeys().length > 0) {
hiveTap = new HivePartitionTap((HiveTap) hiveTap);
if (isPartitionFilterEnabled())
addPartitionFilter(((HivePartitionTap) hiveTap));
}
}
private boolean isPartitionFilterEnabled() {
if (inputFileHiveParquetEntity.getPartitionFilterList().size() > 0)
return false;
else
return true;
}
private void addPartitionFilter(HivePartitionTap hivePartitionTap) {
hivePartitionTap.addSourcePartitionFilter(
new Fields(convertLowerCase(inputFileHiveParquetEntity.getPartitionKeys())),
new PartitionFilter(inputFileHiveParquetEntity.getPartitionFilterList()));
}
/*
* (non-Javadoc)
*
* @see hydrograph.engine.cascading.assembly.base.InputFileHiveBase#
* castHiveEntityFromBase
* (hydrograph.engine.assembly.entity.base.HiveEntityBase) /* cast the
* hiveEntityBase to InputFileHiveParquetEntity
*/
@Override
public void castHiveEntityFromBase(HiveEntityBase hiveEntityBase) {
inputFileHiveParquetEntity = (InputFileHiveParquetEntity) hiveEntityBase;
}
}