package hip.ch6.joins.contribjoin;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.contrib.utils.join.*;
import org.apache.hadoop.io.Text;
public class Reduce extends DataJoinReducerBase {
/**
* Perform an inner join
*
* @param tags
* a list of source tags
* @param values
* a value per source
* @return combined value derived from values of the sources
*/
private TextTaggedMapOutput output = new TextTaggedMapOutput();
private Text textOutput = new Text();
protected TaggedMapOutput combine(Object[] tags, Object[] values) {
// an inner join requires that both sides contain an entry for the
// join key
//
if (tags.length < 2)
return null;
StringBuilder joinedStr = new StringBuilder();
for (int i = 0; i < tags.length; i++) {
if (i > 0) {
joinedStr.append("\t");
}
// strip first column as it is the key on which we joined
String line = ((((TaggedMapOutput) values[i]).getData())).toString();
String[] tokens = StringUtils.split(line, "\t", 2);
joinedStr.append(tokens[1]);
}
textOutput.set(joinedStr.toString());
output.setData(textOutput);
//output.setTag((Text) tags[0]);
return output;
}
}