/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.assembly;
import cascading.pipe.HashJoin;
import cascading.pipe.Pipe;
import cascading.pipe.assembly.Rename;
import cascading.pipe.assembly.Retain;
import cascading.pipe.joiner.Joiner;
import cascading.tuple.Fields;
import hydrograph.engine.cascading.assembly.base.BaseComponent;
import hydrograph.engine.cascading.assembly.infra.ComponentParameters;
import hydrograph.engine.cascading.assembly.utils.JoinHelper;
import hydrograph.engine.cascading.joiners.HashJoinJoiner;
import hydrograph.engine.cascading.joiners.HashJoinJoiner.Option;
import hydrograph.engine.core.component.entity.LookupEntity;
import hydrograph.engine.core.component.entity.elements.JoinKeyFields;
import hydrograph.engine.core.component.entity.elements.OutSocket;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* HashJoin Component for joining two or more files.
*
* @author ganesha
*
*/
public class LookupAssembly extends BaseComponent<LookupEntity> {
private static final long serialVersionUID = 1L;
private LookupEntity lookupEntity;
private static Logger LOG = LoggerFactory.getLogger(LookupAssembly.class);
private Fields[] uniqInputFields;
private Fields[] uniqKeyFields;
private Pipe[] inputPipes;
private Joiner joiner;
private JoinHelper joinHelper;
public LookupAssembly(LookupEntity lookupEntity,
ComponentParameters componentParameters) {
super(lookupEntity, componentParameters);
}
@Override
protected void createAssembly() {
try {
if (LOG.isTraceEnabled()) {
LOG.trace(lookupEntity.toString());
}
for (OutSocket outSocket : lookupEntity.getOutSocketList()) {
LOG.trace("Creating hash join assembly for '"
+ lookupEntity.getComponentId() + "' for socket: '"
+ outSocket.getSocketId() + "' of type: '"
+ outSocket.getSocketType() + "'");
prepare(outSocket);
Pipe join = new HashJoin(lookupEntity.getComponentId()+outSocket.getSocketId(), inputPipes,
uniqKeyFields, getJoinOutputFields(), joiner);
setHadoopProperties(join.getStepConfigDef());
setOutLink(join, outSocket);
}
} catch (Exception e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e.getMessage());
}
}
private Fields getJoinOutputFields() {
Fields joinOutputFields = new Fields();
for (Fields fields : uniqInputFields) {
joinOutputFields = joinOutputFields.append(fields);
}
return joinOutputFields;
}
private void setOutLink(Pipe joinResult, OutSocket outSocket) {
setOutLink(outSocket.getSocketType(), outSocket.getSocketId(),
lookupEntity.getComponentId(),
applyOutputSchema(joinResult, outSocket),
joinHelper.getMapTargetFields(outSocket));
}
private Pipe applyOutputSchema(Pipe joinResult, OutSocket outSocket) {
Pipe outPort;
// Add output file scheme to join result
outPort = new Rename(joinResult,
getAllSourceFieldsWithFileIndexPrefix(outSocket),
joinHelper.getMapTargetFields(outSocket));
outPort = new Retain(outPort, joinHelper.getMapTargetFields(outSocket));
return outPort;
}
private Fields getAllSourceFieldsWithFileIndexPrefix(OutSocket outSocket) {
Fields sourceFields;
Fields combinedSourceFields = new Fields();
int k = 1;
for (int i = 0; i < componentParameters.getInputPipes().size(); i++) {
int index;
String inSocketType = componentParameters.getinSocketType().get(i);
if (!inSocketType.equals("driver"))
index = k;
else
index = 0;
sourceFields = joinHelper.getMapSourceFields(componentParameters
.getinSocketId().get(i), outSocket);
if (sourceFields == null)
continue;
// rename fields. prefix with file index
int[] sourceFieldsPos = sourceFields.getPos();
for (int j : sourceFieldsPos) {
sourceFields = sourceFields.rename(
new Fields(sourceFields.get(j).toString()), new Fields(
index + "." + sourceFields.get(j).toString()));
}
combinedSourceFields = combinedSourceFields.append(sourceFields);
if (!inSocketType.equals("driver"))
k++;
}
return combinedSourceFields;
}
private Option getMatchOption(String match) {
Option option = null;
if (match.equals("first"))
option = Option.first;
else if (match.equals("last"))
option = Option.last;
else if (match.equals("all"))
option = Option.all;
return option;
}
/**
* prepares the {@link HashJoinJoiner} for Join and initializes the input
* pipes, input fields & key fields
*/
private void prepare(OutSocket outSocket) {
joinHelper = new JoinHelper(componentParameters);
uniqInputFields = new Fields[componentParameters.getInputFieldsList()
.size()];
uniqKeyFields = new Fields[lookupEntity.getAllKeyFieldSize()];
inputPipes = new Pipe[componentParameters.getInputPipes().size()];
Fields inputFields;
Fields keyFields = new Fields();
joiner = new HashJoinJoiner(getMatchOption(lookupEntity.getMatch()));
int k = 1;
for (int i = 0; i < componentParameters.getInputPipes().size(); i++) {
int index;
inputFields = componentParameters.getInputFieldsList().get(i);
String inputSocketType = componentParameters.getinSocketType().get(
i);
if (!inputSocketType.equals("driver"))
index = k;
else
index = 0;
int[] inputFieldsPos = inputFields.getPos();
for (JoinKeyFields keyFieldsEntity : lookupEntity.getKeyFields()) {
if (keyFieldsEntity.getInSocketId().equalsIgnoreCase(
componentParameters.getinSocketId().get(i))) {
keyFields = new Fields(keyFieldsEntity.getFields());
}
}
// rename fields. prefix with file index
int[] keyFieldsPos = keyFields.getPos();
for (int j : inputFieldsPos) {
inputFields = inputFields
.rename(new Fields(inputFields.get(j).toString()),
new Fields(index + "."
+ inputFields.get(j).toString()));
}
uniqInputFields[index] = inputFields;
// rename key fields. prefix with file index
for (int j : keyFieldsPos) {
keyFields = keyFields.rename(new Fields(keyFields.get(j)
.toString()), new Fields(index + "."
+ keyFields.get(j).toString()));
}
uniqKeyFields[index] = keyFields;
Pipe inputLink = componentParameters.getInputPipes().get(i);
inputLink = new Rename(inputLink, componentParameters
.getInputFieldsList().get(i), inputFields);
inputLink = new Pipe("link_" + index, inputLink);
// retain only mapped fields and key fields
// to be done
inputPipes[index] = inputLink;
if (!inputSocketType.equals("driver"))
k++;
}
}
@Override
public void initializeEntity(LookupEntity assemblyEntityBase) {
this.lookupEntity=assemblyEntityBase;
}
}