package com.skp.experiment.common; import java.util.Map; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.hadoop.util.ToolRunner; import org.apache.mahout.common.AbstractJob; import com.skp.experiment.common.mapreduce.ColumnSelectMapper; public class SelectColumnsJob extends AbstractJob { public static void main(String[] args) throws Exception { ToolRunner.run(new SelectColumnsJob(), args); } @Override public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); addOption("columnIndexs", "cidxs", "column indexs seperated by ,"); addOption("transformIndex", "tidx", "log transform column index", "-1"); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Job selectJob = prepareJob(getInputPath(), getOutputPath(), TextInputFormat.class, ColumnSelectMapper.class, NullWritable.class, Text.class, TextOutputFormat.class); selectJob.getConfiguration().set(ColumnSelectMapper.COLUMN_INDEXS, getOption("columnIndexs")); selectJob.getConfiguration().setInt(ColumnSelectMapper.TRANSFORM_INDEX, Integer.parseInt(getOption("transformIndex"))); selectJob.waitForCompletion(true); return 0; } }