/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.assembly; import cascading.flow.FlowDef; import cascading.pipe.Pipe; import cascading.scheme.Scheme; import cascading.tap.Tap; import cascading.tap.hadoop.Hfs; import cascading.tuple.Fields; import hydrograph.engine.cascading.assembly.base.BaseComponent; import hydrograph.engine.cascading.assembly.infra.ComponentParameters; import hydrograph.engine.cascading.scheme.avro.AvroDescriptor; import hydrograph.engine.cascading.scheme.avro.CustomAvroScheme; import hydrograph.engine.core.component.entity.InputFileAvroEntity; import hydrograph.engine.core.component.entity.elements.OutSocket; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; public class InputFileAvroAssembly extends BaseComponent<InputFileAvroEntity> { private static final long serialVersionUID = -2946197683137950707L; private InputFileAvroEntity inputFileAvroEntity; @SuppressWarnings("rawtypes") private Scheme scheme; private Pipe pipe; @SuppressWarnings("rawtypes") private Tap tap; private FlowDef flowDef; private Fields inputFields; private String[] fieldDataTypes; private int[] fieldScale; private int[] fieldPrecision; private String[] fieldFormat; private String[] fieldScaleType; private static final Logger LOG = LoggerFactory .getLogger(InputFileAvroAssembly.class); public InputFileAvroAssembly(InputFileAvroEntity baseComponentEntity, ComponentParameters componentParameters) { super(baseComponentEntity, componentParameters); } @Override protected void createAssembly() { try { getSchemaFieldType(); generateTapsAndPipes(); flowDef = flowDef.addSource(pipe, tap); if (LOG.isTraceEnabled()) { LOG.trace(inputFileAvroEntity.toString()); } for (OutSocket outSocket : inputFileAvroEntity.getOutSocketList()) { LOG.trace("Creating input file Avro assembly for '" + inputFileAvroEntity.getComponentId() + "' for socket: '" + outSocket.getSocketId() + "' of type: '" + outSocket.getSocketType() + "'"); setOutLink(outSocket.getSocketType(), outSocket.getSocketId(), inputFileAvroEntity.getComponentId(), pipe, scheme.getSourceFields()); } } catch (Exception e) { LOG.error( "Error in creating assembly for component '" + inputFileAvroEntity.getComponentId() + "', Error: " + e.getMessage(), e); throw new RuntimeException(e); } } @SuppressWarnings("unchecked") public void generateTapsAndPipes() throws IOException { prepareScheme(); flowDef = componentParameters.getFlowDef(); tap = new Hfs(scheme, inputFileAvroEntity.getPath()); pipe = new Pipe(inputFileAvroEntity.getComponentId()+inputFileAvroEntity.getOutSocketList().get(0).getSocketId()); setHadoopProperties(tap.getStepConfigDef()); setHadoopProperties(pipe.getStepConfigDef()); } public void getSchemaFieldType() { inputFields = new Fields(); fieldDataTypes = new String[inputFileAvroEntity.getFieldsList() .size()]; fieldScale = new int[inputFileAvroEntity.getFieldsList().size()]; fieldPrecision = new int[inputFileAvroEntity.getFieldsList() .size()]; fieldFormat = new String[inputFileAvroEntity.getFieldsList() .size()]; fieldScaleType = new String[inputFileAvroEntity.getFieldsList() .size()]; for (int i = 0; i < inputFileAvroEntity.getFieldsList().size(); i++) { inputFields = inputFields.append(new Fields(inputFileAvroEntity .getFieldsList().get(i).getFieldName())); fieldDataTypes[i] = inputFileAvroEntity.getFieldsList() .get(i).getFieldDataType(); fieldFormat[i] = inputFileAvroEntity.getFieldsList().get(i) .getFieldFormat() != null ? inputFileAvroEntity .getFieldsList().get(i).getFieldFormat() : ""; fieldScale[i] = inputFileAvroEntity.getFieldsList().get(i) .getFieldScale(); fieldPrecision[i] = inputFileAvroEntity.getFieldsList() .get(i).getFieldPrecision(); fieldScaleType[i] = inputFileAvroEntity.getFieldsList() .get(i).getFieldScaleType(); } } protected void prepareScheme() { LOG.debug("Applying CustomAvroScheme to read data from avro file"); AvroDescriptor avroDescriptor = new AvroDescriptor(inputFields, dataTypeMapping(fieldDataTypes), fieldPrecision, fieldScale); scheme = new CustomAvroScheme(avroDescriptor); } private Class<?>[] dataTypeMapping(String[] fieldDataTypes) { Class<?>[] types = new Class<?>[fieldDataTypes.length]; for (int i = 0; i < fieldDataTypes.length; i++) { try { types[i] = Class.forName(fieldDataTypes[i]); } catch (ClassNotFoundException e) { throw new RuntimeException( "'" + fieldDataTypes[i] + "' class not found while applying datatypes for component '" + inputFileAvroEntity.getComponentId() + "' ", e); } } return types; } @Override public void initializeEntity(InputFileAvroEntity assemblyEntityBase) { this.inputFileAvroEntity=assemblyEntityBase; } }