/*******************************************************************************
* Copyright 2017 Capital One Services, LLC and Bitwise, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License
*******************************************************************************/
package hydrograph.engine.cascading.joiners;
import cascading.pipe.joiner.Joiner;
import cascading.pipe.joiner.JoinerClosure;
import cascading.pipe.joiner.LeftJoin;
import cascading.tuple.Fields;
import cascading.tuple.Tuple;
import cascading.tuple.Tuples;
import cascading.tuple.util.TupleViews;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.Iterator;
public class HashJoinJoiner implements Joiner {
/**
*
*/
Fields fieldDeclaration;
Option chosenOption;
Joiner allJoiner;
private static final long serialVersionUID = 2662174573324013434L;
/** Field LOG */
private static final Logger LOG = LoggerFactory
.getLogger(HashJoinJoiner.class);
public HashJoinJoiner() {
this.chosenOption = Option.first;
}
public HashJoinJoiner(Option option) {
this.chosenOption = option;
//cascading left joiner will perform all option (Cartesian product)
if (chosenOption == Option.all) {
allJoiner = new LeftJoin();
}
}
public Iterator<Tuple> getIterator(JoinerClosure closure) {
if (chosenOption == Option.all) {
return allJoiner.getIterator(closure);
} else {
//only for first and last option
return new JoinIterator(closure, chosenOption);
}
}
public int numJoins() {
return -1;
}
public static enum Option {
first, last, all
}
public static class JoinIterator implements Iterator<Tuple> {
final JoinerClosure closure;
Iterator[] iterators;
Tuple[] tupleValues;
final int MAIN_INPUT_INDEX = 0;
final Option chosenOption;
TupleBuilder resultBuilder;
Tuple result = new Tuple(); // will be replaced
public JoinIterator(JoinerClosure closure, Option option) {
this.closure = closure;
this.chosenOption = option;
LOG.debug("cogrouped size: {}", closure.size());
init();
}
private void setupForFirst() {
// set the very first tuples from all the iterators except main
for (int i = 0; i < iterators.length; i++) {
if (i == MAIN_INPUT_INDEX) {
continue;// very imp to skip this
}
// if there is nothing in an iterator then set it to all
// nulls(empty tuple)
tupleValues[i] = iterators[i].hasNext() ? (Tuple) iterators[i]
.next() : Tuple
.size(closure.getValueFields()[i].size());
}
}
private void setupForLast() {
// set the very first tuples from all the iterators except main
for (int i = 0; i < iterators.length; i++) {
if (i == MAIN_INPUT_INDEX) {
continue;// very imp to skip this
}
Tuple lastTuple = getLastTuple(iterators[i]);
// if there is nothing in an iterator then set it to all
// nulls(empty tuple)
tupleValues[i] = lastTuple == null ? Tuple.size(closure
.getValueFields()[i].size()) : lastTuple;
}
}
private Tuple getLastTuple(Iterator<Tuple> iterator) {
Tuple lastTuple = null;
while (iterator.hasNext()) {
lastTuple = iterator.next();
}
return lastTuple;
}
protected void init() {
iterators = new Iterator[closure.size()];
tupleValues = new Tuple[iterators.length];
for (int i = 0; i < closure.size(); i++)
iterators[i] = getIterator(i);
if (chosenOption == Option.first) {
setupForFirst();
} else {
setupForLast();
}
boolean isUnknown = false;
for (Fields fields : closure.getValueFields())
isUnknown |= fields.isUnknown();
if (isUnknown)
resultBuilder = new TupleBuilder() {
Tuple result = new Tuple(); // is re-used
@Override
public Tuple makeResult(Tuple[] tuples) {
result.clear();
// flatten the results into one Tuple
for (Tuple lastValue : tuples)
result.addAll(lastValue);
return result;
}
};
else
resultBuilder = new TupleBuilder() {
Tuple result;
{
// handle self join.
Fields[] fields = closure.getValueFields();
if (closure.isSelfJoin()) {
fields = new Fields[closure.size()];
Arrays.fill(fields, closure.getValueFields()[0]);
}
result = TupleViews.createComposite(fields);
}
@Override
public Tuple makeResult(Tuple[] tuples) {
return TupleViews.reset(result, tuples);
}
};
}
protected Iterator getIterator(int i) {
return closure.getIterator(i);
}
public final boolean hasNext() {
return iterators[MAIN_INPUT_INDEX].hasNext();
}
public Tuple next() {
tupleValues[MAIN_INPUT_INDEX] = (Tuple) iterators[MAIN_INPUT_INDEX]
.next();
return makeResult(tupleValues);
}
private Tuple makeResult(Tuple[] lastValues) {
Tuples.asModifiable(result);
result = resultBuilder.makeResult(lastValues);
if (LOG.isTraceEnabled())
LOG.trace("tuple: {}", result.print());
return result;
}
public void remove() {
// unsupported
}
}
static interface TupleBuilder {
Tuple makeResult(Tuple[] tuples);
}
}