/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.core.component.generator; import hydrograph.engine.core.component.entity.InputFileHiveParquetEntity; import hydrograph.engine.core.component.entity.utils.InputEntityUtils; import hydrograph.engine.core.component.generator.base.InputComponentGeneratorBase; import hydrograph.engine.jaxb.commontypes.TypeBaseComponent; import hydrograph.engine.jaxb.ihiveparquet.HivePartitionFieldsType; import hydrograph.engine.jaxb.ihiveparquet.HivePartitionFilterType; import hydrograph.engine.jaxb.ihiveparquet.PartitionColumn; import hydrograph.engine.jaxb.ihiveparquet.PartitionFieldBasicType; import hydrograph.engine.jaxb.inputtypes.ParquetHiveFile; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.ArrayList; import java.util.HashMap; import java.util.List; /** * The Class InputFileHiveParquetEntityGenerator. * * @author Bitwise * */ public class InputFileHiveParquetEntityGenerator extends InputComponentGeneratorBase { private ParquetHiveFile jaxbInputFileHiveParquetFile; private InputFileHiveParquetEntity inputFileHiveParquetEntity; private static Logger LOG = LoggerFactory .getLogger(InputFileHiveParquetEntityGenerator.class); public InputFileHiveParquetEntityGenerator(TypeBaseComponent baseComponent) { super(baseComponent); } @Override public void castComponentFromBase(TypeBaseComponent baseComponent) { jaxbInputFileHiveParquetFile = (ParquetHiveFile) baseComponent; } @Override public void createEntity() { inputFileHiveParquetEntity = new InputFileHiveParquetEntity(); } @Override public void initializeEntity() { LOG.trace("Initializing input file hive parquet entity for component: " + jaxbInputFileHiveParquetFile.getId()); inputFileHiveParquetEntity.setComponentId(jaxbInputFileHiveParquetFile .getId()); inputFileHiveParquetEntity.setFieldsList(InputEntityUtils .extractInputFields(jaxbInputFileHiveParquetFile.getOutSocket() .get(0).getSchema() .getFieldOrRecordOrIncludeExternalSchema())); inputFileHiveParquetEntity.setRuntimeProperties(InputEntityUtils .extractRuntimeProperties(jaxbInputFileHiveParquetFile .getRuntimeProperties())); inputFileHiveParquetEntity.setOutSocketList(InputEntityUtils .extractOutSocket(jaxbInputFileHiveParquetFile.getOutSocket())); inputFileHiveParquetEntity.setDatabaseName(jaxbInputFileHiveParquetFile .getDatabaseName().getValue()); inputFileHiveParquetEntity.setTableName(jaxbInputFileHiveParquetFile .getTableName().getValue()); inputFileHiveParquetEntity .setPartitionKeys(extractPartitionFields(jaxbInputFileHiveParquetFile .getPartitionKeys())); inputFileHiveParquetEntity .setExternalTablePathUri(jaxbInputFileHiveParquetFile .getExternalTablePath() == null ? null : jaxbInputFileHiveParquetFile.getExternalTablePath() .getUri()); inputFileHiveParquetEntity .setPartitionFilterList(populatePartitionFilterList(jaxbInputFileHiveParquetFile.getPartitionFilter())); inputFileHiveParquetEntity.setListOfPartitionKeyValueMap(populatePartitionKeyValueMap(jaxbInputFileHiveParquetFile.getPartitionFilter())); } private ArrayList<HashMap<String, String>> populatePartitionKeyValueMap(HivePartitionFilterType partitionFilter) { ArrayList<HashMap<String, String>> partitionKeyValueMap = new ArrayList<>(); if (partitionFilter != null && partitionFilter.getPartitionColumn() != null) { for (PartitionColumn column : partitionFilter.getPartitionColumn()) { HashMap<String, String> map = new HashMap<>(); map.put(column.getName(), column.getValue()); if (column.getPartitionColumn() != null) fillPartitionKeyValueMap(map, column.getPartitionColumn()); partitionKeyValueMap.add(map); } } return partitionKeyValueMap; } private void fillPartitionKeyValueMap(HashMap<String, String> partitionKeyValue, PartitionColumn partitionColumn) { partitionKeyValue.put(partitionColumn.getName(),partitionColumn.getValue()); if(partitionColumn.getPartitionColumn()!=null) fillPartitionKeyValueMap(partitionKeyValue,partitionColumn.getPartitionColumn()); } private ArrayList<ArrayList<String>> populatePartitionFilterList(HivePartitionFilterType hivePartitionFilterType) { ArrayList<ArrayList<String>> listOfPartitionColumn = new ArrayList<ArrayList<String>>(); if (hivePartitionFilterType != null && hivePartitionFilterType.getPartitionColumn() != null) { for (PartitionColumn partitionColumn : hivePartitionFilterType.getPartitionColumn()) { ArrayList<String> arrayList = new ArrayList<String>(); arrayList = fillArrayList(partitionColumn, arrayList); listOfPartitionColumn.add(arrayList); } } return listOfPartitionColumn; } private ArrayList<String> fillArrayList(PartitionColumn partitionColumn, ArrayList<String> listOfPartitionColumn) { listOfPartitionColumn.add(partitionColumn.getValue()); if (partitionColumn.getPartitionColumn() != null) { listOfPartitionColumn = fillArrayList(partitionColumn.getPartitionColumn(), listOfPartitionColumn); } return listOfPartitionColumn; } private String createPartitionFilterRegex( HivePartitionFilterType hivePartitionFilterType) { if (hivePartitionFilterType != null && hivePartitionFilterType.getPartitionColumn() != null) { String partitionRegex = ""; String regex = ""; int numberOfPartitionKeys = inputFileHiveParquetEntity .getPartitionKeys().length; for (PartitionColumn partitionColumn : hivePartitionFilterType .getPartitionColumn()) { if (partitionRegex != "") { partitionRegex = partitionRegex + "|"; } regex = ""; regex = buildRegex(partitionColumn, regex); if (!(regex.split("\t").length == numberOfPartitionKeys)) { regex = regex + "\t.*"; } else { regex = regex + "\\b"; } partitionRegex = partitionRegex + regex; } return partitionRegex; } else { return ""; } } private String buildRegex(PartitionColumn partitionColumn, String partitionRegex) { partitionRegex = partitionRegex + partitionColumn.getValue(); if (partitionColumn.getPartitionColumn() != null) { partitionRegex = partitionRegex + "\t"; partitionRegex = buildRegex(partitionColumn.getPartitionColumn(), partitionRegex); } return partitionRegex; } /** * This method extracts partition keys from {@link HivePartitionFieldsType} * hivePartitionFieldsType which is passed as a parameter. * * If hivePartitionFieldsType object is null then string array of size of 0 * will be returned. * * @param hivePartitionFieldsType * @return String[] */ private String[] extractPartitionFields( HivePartitionFieldsType hivePartitionFieldsType) { String[] partitionKeys; List<String> partitionFieldsList = new ArrayList<String>(); if (hivePartitionFieldsType != null && hivePartitionFieldsType.getField() != null) { partitionFieldsList = getPartitionFieldsList( hivePartitionFieldsType.getField(), partitionFieldsList); partitionKeys = partitionFieldsList .toArray(new String[partitionFieldsList.size()]); return partitionKeys; } else { return new String[0]; } } private List<String> getPartitionFieldsList( PartitionFieldBasicType partitionFieldBasicType, List<String> partitionFieldsList) { partitionFieldsList.add(partitionFieldBasicType.getName()); if (partitionFieldBasicType.getField() != null) { getPartitionFieldsList(partitionFieldBasicType.getField(), partitionFieldsList); } return partitionFieldsList; } @Override public InputFileHiveParquetEntity getEntity() { return inputFileHiveParquetEntity; } }