package com.skp.experiment.common.mapreduce; import java.io.IOException; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils; import com.skp.experiment.common.OptionParseUtil; /** * group by TextInputFileFormat. * @author skplanet * */ public class GroupByColumnInValueMapper extends Mapper<LongWritable, Text, Text, Text> { public static final String KEY_COLUMN_INDEX = GroupByColumnInValueMapper.class.getName() + ".keyColumnIndex"; public static final String DELIMETER = ","; private List<Integer> keyColumnIndexs; private static Text outKey = new Text(); @Override protected void setup(Context context) throws IOException, InterruptedException { keyColumnIndexs = OptionParseUtil.decode(context.getConfiguration().get(KEY_COLUMN_INDEX), DELIMETER); } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString()); outKey.set(OptionParseUtil.encode(tokens, keyColumnIndexs, DELIMETER)); context.write(outKey, value); } }