/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.assembly; import cascading.flow.FlowDef; import cascading.jdbc.JDBCScheme; import cascading.jdbc.JDBCTap; import cascading.jdbc.TableDesc; import cascading.jdbc.db.DBInputFormat; import cascading.pipe.Pipe; import cascading.tap.SinkMode; import cascading.tuple.Fields; import hydrograph.engine.cascading.assembly.base.BaseComponent; import hydrograph.engine.cascading.assembly.infra.ComponentParameters; import hydrograph.engine.cascading.assembly.utils.InputOutputFieldsAndTypesCreator; import hydrograph.engine.core.component.entity.InputRDBMSEntity; import hydrograph.engine.core.component.entity.elements.OutSocket; import hydrograph.engine.core.component.entity.elements.SchemaField; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Arrays; import java.util.List; public abstract class InputRDBMSAssembly extends BaseComponent<InputRDBMSEntity> { /** * RDBMS Input Component - read records from RDBMS Table. * */ private static final long serialVersionUID = -2946197683137950707L; protected FlowDef flowDef; protected Pipe pipe; protected List<SchemaField> schemaFieldList; protected InputRDBMSEntity inputRDBMSEntity; protected String[] fieldsDataType; @SuppressWarnings("rawtypes") Class<? extends DBInputFormat> inputFormatClass; protected JDBCScheme scheme; private TableDesc tableDesc; protected String driverName; protected String jdbcURL; protected String[] columnDefs = {}; protected String[] primaryKeys = null; protected Fields fields; protected String[] columnNames; protected JDBCTap rdbmsTap; protected final static String TIME_STAMP = "HH:mm:ss"; protected InputOutputFieldsAndTypesCreator<InputRDBMSEntity> fieldsCreator; private static Logger LOG = LoggerFactory.getLogger(InputRDBMSAssembly.class); public InputRDBMSAssembly(InputRDBMSEntity baseComponentEntity, ComponentParameters componentParameters) { super(baseComponentEntity, componentParameters); } @Override public void initializeEntity(InputRDBMSEntity assemblyEntityBase) { inputRDBMSEntity = assemblyEntityBase; } public abstract void intializeRdbmsSpecificDrivers(); /* * (non-Javadoc) * * @see * com.bitwiseglobal.cascading.assembly.base.BaseComponent#createAssembly() * This method call the generate Taps and Pipes and setOutlinks */ protected void createAssembly() { fieldsCreator = new InputOutputFieldsAndTypesCreator<InputRDBMSEntity>(inputRDBMSEntity); intializeRdbmsSpecificDrivers(); generateTapsAndPipes(); // exception handled separately within try { flowDef = flowDef.addSource(pipe, rdbmsTap); if (LOG.isTraceEnabled()) { LOG.trace(inputRDBMSEntity.toString()); } for (OutSocket outSocket : inputRDBMSEntity.getOutSocketList()) { String[] fieldsArray = new String[inputRDBMSEntity.getFieldsList().size()]; int i = 0; for (SchemaField Fields : inputRDBMSEntity.getFieldsList()) { fieldsArray[i++] = Fields.getFieldName(); } LOG.trace("Creating input " + inputRDBMSEntity.getDatabaseType() + " assembly for '" + inputRDBMSEntity.getComponentId() + "' for socket: '" + outSocket.getSocketId() + "' of type: '" + outSocket.getSocketType() + "'"); setOutLink(outSocket.getSocketType(), outSocket.getSocketId(), inputRDBMSEntity.getComponentId(), pipe, new Fields(fieldsArray)); } } catch (Exception e) { LOG.error("Error in creating assembly for component '" + inputRDBMSEntity.getComponentId() + "', Error: " + e.getMessage(), e); throw new RuntimeException(e); } } /** * This method will create the table descriptor and scheme to read the data * from RDBMS Table. In this method, table descriptor and scheme will be * created for specific file format like TextDelimited for Text file, and so * on for other file format like parquet, etc. */ protected void prepareScheme() { fields = fieldsCreator.makeFieldsWithTypes(); columnNames = fieldsCreator.getFieldNames(); LOG.debug("Applying " + inputRDBMSEntity.getDatabaseType() + " schema to read data from RDBMS"); createTableDescAndScheme(); } protected void createTableDescAndScheme() { // For sql query if (inputRDBMSEntity.getSelectQuery() != null && inputRDBMSEntity.getSelectQuery() != "") { String selectgSql = inputRDBMSEntity.getSelectQuery(); String countSql = inputRDBMSEntity.getCountQuery(); scheme = new JDBCScheme(inputFormatClass, fields, columnNames, selectgSql, countSql, -1); } else { tableDesc = new TableDesc(inputRDBMSEntity.getTableName(), fieldsCreator.getFieldNames(), columnDefs, primaryKeys); scheme = new JDBCScheme(inputFormatClass, null, fields, columnNames, null, null, -1, null, null, true); } } protected void initializeRdbmsTap() { LOG.debug("Initializing RDBMS Tap."); if (inputRDBMSEntity.getSelectQuery() == null || inputRDBMSEntity.getSelectQuery() == "") { rdbmsTap = new JDBCTap(jdbcURL, inputRDBMSEntity.getUsername(), inputRDBMSEntity.getPassword(), driverName, tableDesc, scheme, SinkMode.REPLACE); } else { rdbmsTap = new JDBCTap(jdbcURL, inputRDBMSEntity.getUsername(), inputRDBMSEntity.getPassword(), driverName, scheme); } } public void generateTapsAndPipes() { // initializing each pipe and tap LOG.debug(inputRDBMSEntity.getDatabaseType() + " Input Component '" + inputRDBMSEntity.getComponentId() + "': [ Database Name: " + inputRDBMSEntity.getDatabaseName() + (inputRDBMSEntity.getTableName() == null ? (", Select Query: " + inputRDBMSEntity.getSelectQuery() + ", Count Query: " + inputRDBMSEntity.getCountQuery()) : (", Table Name: " + inputRDBMSEntity.getTableName())) + ", Column Names: " + Arrays.toString(fieldsCreator.getFieldNames()) + "]"); // scheme and tap to be initialized in its specific assembly try { schemaFieldList = inputRDBMSEntity.getFieldsList(); fieldsDataType = new String[schemaFieldList.size()]; int i = 0; for (SchemaField eachSchemaField : schemaFieldList) { fieldsDataType[i++] = eachSchemaField.getFieldDataType(); } prepareScheme(); } catch (Exception e) { LOG.error("Error in preparing scheme for component '" + inputRDBMSEntity.getComponentId() + "': " + e.getMessage()); throw new RuntimeException(e); } flowDef = componentParameters.getFlowDef(); initializeRdbmsTap(); pipe = new Pipe(inputRDBMSEntity.getComponentId()); setHadoopProperties(rdbmsTap.getStepConfigDef()); setHadoopProperties(pipe.getStepConfigDef()); } }