package org.openflamingo.mapreduce.etl.groupby;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
/**
* 지정한 키로 Group By하여 하나의 로우로 키와 값을 취합하는 GroupBy ETL Mapper.
*
* @author Edward KIM
* @author Seo Ji Hye
* @since 0.1
*/
public class GroupByMapper extends Mapper<LongWritable, Text, Text, Text> {
/**
* 입력 파일의 구분자.
*/
private String inputDelimiter;
/**
* Group By할 키의 위치.
*/
private int keyColumn;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Configuration configuration = context.getConfiguration();
inputDelimiter = configuration.get("inputDelimiter");
keyColumn = configuration.getInt("groupByKey", -1);
if (keyColumn == -1) {
throw new IllegalArgumentException("You must specify 'groupByKey' for Group By");
}
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] columns = value.toString().split(inputDelimiter);
for (int index = 0; index < columns.length; index++) {
if (index == keyColumn) {
continue;
}
context.write(new Text(columns[keyColumn]), new Text(columns[index]));
}
}
}