package hip.ch6.joins.contribjoin; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.contrib.utils.join.*; import org.apache.hadoop.io.Text; public class Map extends DataJoinMapperBase { protected Text generateInputTag(String inputFile) { // tag the row with input file name (data source) return new Text(inputFile); } protected Text generateGroupKey(TaggedMapOutput output) { // first column in the input tab separated files becomes the key (to perform the JOIN) String line = (output.getData()).toString(); String[] tokens = StringUtils.split(line, "\t", 2); String groupKey = tokens[0]; return new Text(groupKey); } protected TaggedMapOutput generateTaggedMapOutput(Object value) { TaggedMapOutput output = new TextTaggedMapOutput((Text) value); output.setTag(new Text(this.inputTag)); return output; } }