/******************************************************************************* * Copyright 2017 Capital One Services, LLC and Bitwise, Inc. * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License *******************************************************************************/ package hydrograph.engine.cascading.assembly.handlers; import cascading.flow.FlowProcess; import cascading.operation.BaseOperation; import cascading.operation.Buffer; import cascading.operation.BufferCall; import cascading.operation.OperationCall; import cascading.tuple.Fields; import cascading.tuple.Tuple; import cascading.tuple.TupleEntry; import hydrograph.engine.cascading.assembly.context.RemoveDupsHandlerContext; import hydrograph.engine.cascading.assembly.infra.AssemblyCreationException; import hydrograph.engine.core.constants.Keep; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Iterator; public class RemoveDupsHandler extends BaseOperation<RemoveDupsHandlerContext> implements Buffer<RemoveDupsHandlerContext> { private static final long serialVersionUID = 3236218434040424251L; private static Logger LOG = LoggerFactory .getLogger(RemoveDupsHandler.class); private String linkType; private Fields inputFields; private Keep keep; public RemoveDupsHandler(String socketType, Keep keep, Fields inputFields) { super(inputFields.size(), inputFields.append(new Fields("keep"))); this.linkType = socketType; this.inputFields = inputFields; this.keep = keep; LOG.trace("RemoveDupsHandler object created"); } public String getOutputLinkType() { return linkType; } public Fields getInputFields() { return inputFields; } public Fields getOutputFields() { return inputFields; } @SuppressWarnings("rawtypes") @Override public void prepare(FlowProcess flowProcess, OperationCall<RemoveDupsHandlerContext> call) { RemoveDupsKeep keepImplementation; if (keep == Keep.first) { keepImplementation = new KeepFirst(); } else if (keep == Keep.last) { keepImplementation = new KeepLast(); } else if (keep == Keep.unique) { keepImplementation = new KeepUnique(); } else { throw new AssemblyCreationException( "Unable to create RemoveDupsHandler for unknown keep option " + keep); } RemoveDupsHandlerContext context = new RemoveDupsHandlerContext( keepImplementation); call.setContext(context); } @SuppressWarnings("rawtypes") @Override public void operate(FlowProcess arg0, BufferCall<RemoveDupsHandlerContext> call) { call.getContext().getKeepImplementation().operate(call); } public interface RemoveDupsKeep { public void operate(BufferCall<RemoveDupsHandlerContext> call); } class KeepFirst implements RemoveDupsKeep { public void operate(BufferCall<RemoveDupsHandlerContext> call) { Iterator<TupleEntry> records = call.getArgumentsIterator(); boolean isFirstRecordProcessed = false; while (records.hasNext()) { Tuple record = records.next().getTupleCopy(); record.add(new Fields("keep")); if (!isFirstRecordProcessed) { record.setBoolean(record.size() - 1, true); isFirstRecordProcessed = true; } else { record.setBoolean(record.size() - 1, false); } call.getOutputCollector().add(record); } } } class KeepLast implements RemoveDupsKeep { public void operate(BufferCall<RemoveDupsHandlerContext> call) { Iterator<TupleEntry> records = call.getArgumentsIterator(); while (records.hasNext()) { Tuple record = records.next().getTupleCopy(); record.add(new Fields("keep")); if (records.hasNext()) { record.setBoolean(record.size() - 1, false); } else { record.setBoolean(record.size() - 1, true); } call.getOutputCollector().add(record); } } } class KeepUnique implements RemoveDupsKeep { public void operate(BufferCall<RemoveDupsHandlerContext> call) { Iterator<TupleEntry> records = call.getArgumentsIterator(); if (records.hasNext()) { Tuple firstRecord = records.next().getTupleCopy(); firstRecord.add(new Fields("keep")); if (records.hasNext()) { // if there are more records, it means we need to reject all firstRecord.setBoolean(firstRecord.size() - 1, false); call.getOutputCollector().add(firstRecord); while (records.hasNext()) { firstRecord = records.next().getTupleCopy(); firstRecord.add(new Fields("keep")); firstRecord.setBoolean(firstRecord.size() - 1, false); call.getOutputCollector().add(firstRecord); } } else { firstRecord.setBoolean(firstRecord.size() - 1, true); call.getOutputCollector().add(firstRecord); } } } } }