package storm.cookbook.tfidf.functions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import storm.trident.tuple.TridentTuple;
import backtype.storm.tuple.Values;
public class OuterJoinState {
// 2 columns, based on streamId. Each column contains the tuples from the
// given stream
private HashMap<Integer, List<Object[]>> bothSides = new HashMap<Integer, List<Object[]>>();
public void addValues(int streamId, TridentTuple input) {
if (!bothSides.keySet().contains(streamId)) {
if (bothSides.keySet().size() >= 2)
throw new IllegalArgumentException("Outer join can only be performed between 2 streams");
bothSides.put(streamId, new ArrayList<Object[]>());
}
bothSides.get(streamId).add(input.toArray());
}
// the shorter side is the LHS
private int getLHS() {
int len = Integer.MAX_VALUE;
int index = 0;
for (int id : bothSides.keySet()) {
if (bothSides.get(id).size() < len) {
len = bothSides.get(id).size();
index = id;
}
}
return index;
}
private int getRhs(int lhs) {
for (int test : bothSides.keySet()) {
if (test != lhs)
return test;
}
throw new IllegalArgumentException("Can't find RHS!");
}
public List<Values> join() {
List<Values> ret = new ArrayList<Values>();
try {
int lhsId = getLHS();
int rhsId = getRhs(lhsId);
for (Object[] lhs : bothSides.get(lhsId)) {
for (Object[] rhs : bothSides.get(rhsId)) {
Values vals = new Values(lhs);
vals.addAll(Arrays.asList(rhs));
ret.add(vals);
}
}
} catch (Exception e) {
}
return ret;
}
}