/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.assembly;
import cascading.pipe.Each;
import cascading.pipe.Every;
import cascading.pipe.GroupBy;
import cascading.pipe.Pipe;
import cascading.pipe.assembly.Retain;
import cascading.tuple.Fields;
import hydrograph.engine.cascading.assembly.base.BaseComponent;
import hydrograph.engine.cascading.assembly.handlers.RemoveDupsHandler;
import hydrograph.engine.cascading.assembly.infra.ComponentParameters;
import hydrograph.engine.cascading.filters.RemoveDupsOutLinkFilter;
import hydrograph.engine.cascading.filters.RemoveDupsUnusedLinkFilter;
import hydrograph.engine.core.component.entity.RemoveDupsEntity;
import hydrograph.engine.core.component.entity.elements.KeyField;
import hydrograph.engine.core.component.entity.elements.OutSocket;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Collections;
public class RemoveDupsAssembly extends BaseComponent<RemoveDupsEntity> {
private static final long serialVersionUID = 8050470302089972525L;
private RemoveDupsEntity removeDupsEntity;
private GroupBy sortDedupPipe;
private Pipe everyPipe;
private static Logger LOG = LoggerFactory.getLogger(RemoveDupsAssembly.class);
public RemoveDupsAssembly(RemoveDupsEntity entity, ComponentParameters componentParameters) {
super(entity, componentParameters);
}
@Override
protected void createAssembly() {
try {
if (LOG.isTraceEnabled()) {
LOG.trace(removeDupsEntity.toString());
}
OutSocket unusedSocket = null;
for (OutSocket outSocket : removeDupsEntity.getOutSocketList()) {
if (outSocket.getSocketType().equalsIgnoreCase("out")) {
LOG.trace("Creating remove dups assembly for '" + removeDupsEntity.getComponentId()
+ "' for socket: '" + outSocket.getSocketId() + "' of type: '" + outSocket.getSocketType()
+ "'");
createAssemblyForOutSocket(outSocket);
} else if (outSocket.getSocketType().equalsIgnoreCase("unused")) {
unusedSocket = outSocket;
}
}
if (unusedSocket != null) {
LOG.trace("Creating remove dups assembly for '" + removeDupsEntity.getComponentId() + "' for socket: '"
+ unusedSocket.getSocketId() + "' of type: '" + unusedSocket.getSocketType() + "'");
createAssemblyForUnusedSocket(unusedSocket);
}
} catch (Exception e) {
LOG.error(e.getMessage(), e);
throw new RuntimeException(e.getMessage());
}
}
private void createAssemblyForUnusedSocket(OutSocket outSocket) {
if (everyPipe == null) {
throw new NullOutSocketException(removeDupsEntity.getComponentId());
} else {
Pipe filterUnusedPipe = createFilterPipe("unused",outSocket.getSocketId(), everyPipe);
setOutLink(outSocket.getSocketType(), outSocket.getSocketId(), removeDupsEntity.getComponentId(),
filterUnusedPipe, componentParameters.getInputFields());
}
}
private void createAssemblyForOutSocket(OutSocket outSocket) {
Fields keyFields = getFieldsFromKeyFields(removeDupsEntity.getKeyFields());
Fields secondaryKeyFields = getFieldsFromKeyFields(removeDupsEntity.getSecondaryKeyFields());
sortDedupPipe = new GroupBy(componentParameters.getInputPipe(), keyFields, secondaryKeyFields);
everyPipe = createEveryPipe("out", outSocket.getSocketId(),sortDedupPipe);
Pipe filterOutPipe = createFilterPipe("out", outSocket.getSocketId(),everyPipe);
setOutLink(outSocket.getSocketType(), outSocket.getSocketId(), removeDupsEntity.getComponentId(), filterOutPipe,
componentParameters.getInputFields());
}
private Pipe createFilterPipe(String linkType, String outSocketId, Pipe everyPipe) {
Pipe filterPipe = new Pipe(removeDupsEntity.getComponentId() + "_RemoveDupsFilter_" + outSocketId, everyPipe);
setHadoopProperties(everyPipe.getStepConfigDef());
if (linkType.equals("unused")) {
filterPipe = new Each(filterPipe, new Fields("keep"), new RemoveDupsUnusedLinkFilter());
} else {
filterPipe = new Each(filterPipe, new Fields("keep"), new RemoveDupsOutLinkFilter());
}
filterPipe = new Retain(filterPipe, componentParameters.getInputFields());
return filterPipe;
}
private Pipe createEveryPipe(String linkType, String outSocketId, Pipe groupByPipe) {
groupByPipe = new Pipe(removeDupsEntity.getComponentId() + "_" + outSocketId, groupByPipe);
RemoveDupsHandler handler = new RemoveDupsHandler(linkType, removeDupsEntity.getKeep(),
componentParameters.getInputFields());
setHadoopProperties(groupByPipe.getStepConfigDef());
return new Every(groupByPipe, handler.getInputFields(), handler, Fields.RESULTS);
}
/**
* Creates an object of type {@link Fields} from array of {@link KeyField}
*
* @param keyFields
* an array of {@link KeyField} containing the field name and
* sort order
* @return an object of type {@link Fields}
*/
private Fields getFieldsFromKeyFields(KeyField[] keyFields) {
if (keyFields == null) {
return Fields.NONE;
}
String[] fieldNames = new String[keyFields.length];
int i = 0;
for (KeyField eachField : keyFields) {
fieldNames[i] = eachField.getName();
i++;
}
Fields fields = new Fields(fieldNames);
i = 0;
for (KeyField eachField : keyFields) {
if (eachField.getSortOrder().equalsIgnoreCase("desc")) {
fields.setComparator(eachField.getName(), Collections.reverseOrder());
}
i++;
}
return fields;
}
private class NullOutSocketException extends RuntimeException {
/**
*
*/
private static final long serialVersionUID = 1L;
public NullOutSocketException(String componentId) {
super("Out socket cannot be null for Remove Dups component '" + componentId + "'");
Logger LOG = LoggerFactory.getLogger(NullOutSocketException.class);
LOG.error(this.getMessage(), this);
}
}
@Override
public void initializeEntity(RemoveDupsEntity assemblyEntityBase) {
this.removeDupsEntity=assemblyEntityBase;
}
}