package com.skp.experiment.common.mapreduce; import java.io.IOException; import java.util.List; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils; import com.skp.experiment.common.OptionParseUtil; /** * select columns in order given by param * @author 1000668 * */ public class ColumnSelectMapper extends Mapper<LongWritable, Text, NullWritable, Text> { public static final String COLUMN_INDEXS = ColumnSelectMapper.class.getName() + ".columnIndexs"; public static final String TRANSFORM_INDEX = ColumnSelectMapper.class.getName() + ".transformIndex"; private static final String DELIMETER = ","; private static List<Integer> columnIndexs; private static Text outValue = new Text(); private static int transformIndex = -1; @Override protected void setup(Context context) throws IOException, InterruptedException { columnIndexs = OptionParseUtil.decode( context.getConfiguration().get(COLUMN_INDEXS), DELIMETER); transformIndex = context.getConfiguration().getInt(TRANSFORM_INDEX, -1); } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString()); StringBuffer sb = new StringBuffer(); for (int i = 0; i < columnIndexs.size(); i++) { int idx = columnIndexs.get(i); if (idx < 0 || idx >= tokens.length) { continue; } if (i != 0) { sb.append(DELIMETER); } if (idx == transformIndex) { sb.append(Math.log(Float.parseFloat(tokens[idx]) + 1.0) + 1.0); } else { sb.append(tokens[idx]); } } outValue.set(sb.toString()); context.write(NullWritable.get(), outValue); } }