/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.assembly;
import cascading.flow.FlowDef;
import cascading.pipe.Pipe;
import cascading.scheme.Scheme;
import cascading.scheme.hadoop.TextDelimited;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tuple.Fields;
import hydrograph.engine.cascading.assembly.base.BaseComponent;
import hydrograph.engine.cascading.assembly.infra.ComponentParameters;
import hydrograph.engine.cascading.assembly.utils.InputOutputFieldsAndTypesCreator;
import hydrograph.engine.cascading.scheme.HydrographDelimitedParser;
import hydrograph.engine.core.component.entity.InputFileDelimitedEntity;
import hydrograph.engine.core.component.entity.elements.OutSocket;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Arrays;
public class InputFileDelimitedAssembly extends BaseComponent<InputFileDelimitedEntity> {
private static final long serialVersionUID = -2946197683137950707L;
private Pipe pipe;
@SuppressWarnings("rawtypes")
private Tap tap;
@SuppressWarnings("rawtypes")
private Scheme scheme;
private FlowDef flowDef;
private InputFileDelimitedEntity inputFileDelimitedEntity;
private static Logger LOG = LoggerFactory.getLogger(InputFileDelimitedAssembly.class);
private InputOutputFieldsAndTypesCreator<InputFileDelimitedEntity> fieldsCreator;
public InputFileDelimitedAssembly(InputFileDelimitedEntity baseComponentEntity, ComponentParameters componentParameters) {
super(baseComponentEntity, componentParameters);
}
@Override
protected void createAssembly() {
try {
fieldsCreator = new InputOutputFieldsAndTypesCreator<InputFileDelimitedEntity>(inputFileDelimitedEntity);
LOG.debug("InputFile Delimited Component: [ Fields List : " + Arrays.toString(fieldsCreator.getFieldNames()) + ", Field Types : "
+ Arrays.toString(fieldsCreator.getFieldDataTypes()) + ", Delimiter : '" + inputFileDelimitedEntity.getDelimiter()
+ "' , Path : " + inputFileDelimitedEntity.getPath() + ", Batch : "
+ inputFileDelimitedEntity.getBatch() + "]");
generateTapsAndPipes();
flowDef = flowDef.addSource(pipe, tap);
if (LOG.isTraceEnabled()) {
LOG.trace(inputFileDelimitedEntity.toString());
}
for (OutSocket outSocket : inputFileDelimitedEntity.getOutSocketList()) {
LOG.trace("Creating input file delimited assembly for '" + inputFileDelimitedEntity.getComponentId()
+ "' for socket: '" + outSocket.getSocketId() + "' of type: '" + outSocket.getSocketType()
+ "'");
setOutLink(outSocket.getSocketType(), outSocket.getSocketId(),
inputFileDelimitedEntity.getComponentId(), pipe, scheme.getSourceFields());
}
} catch (Exception e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e.getMessage());
}
}
@SuppressWarnings("unchecked")
public void generateTapsAndPipes() throws IOException {
try {
prepareScheme();
} catch (Exception e) {
LOG.error("Error in preparing scheme for component '" + inputFileDelimitedEntity.getComponentId() + "': "
+ e.getMessage());
throw new RuntimeException(e);
}
flowDef = componentParameters.getFlowDef();
// initializing each pipe and tap
tap = new Hfs(scheme, inputFileDelimitedEntity.getPath());
pipe = new Pipe(inputFileDelimitedEntity.getComponentId()+inputFileDelimitedEntity.getOutSocketList().get(0).getSocketId());
setHadoopProperties(pipe.getStepConfigDef());
setHadoopProperties(tap.getStepConfigDef());
}
protected void prepareScheme() {
Fields inputFields = fieldsCreator.makeFieldsWithTypes();
HydrographDelimitedParser delimitedParser = new HydrographDelimitedParser(inputFileDelimitedEntity.getDelimiter(),
inputFileDelimitedEntity.getQuote(), null, inputFileDelimitedEntity.isStrict(),
inputFileDelimitedEntity.isSafe());
scheme = new TextDelimited(inputFields, null, inputFileDelimitedEntity.isHasHeader(), false,
inputFileDelimitedEntity.getCharset(), delimitedParser);
}
@Override
public void initializeEntity(InputFileDelimitedEntity assemblyEntityBase) {
this.inputFileDelimitedEntity=assemblyEntityBase;
}
}