package org.openflamingo.mapreduce.etl.aggregate;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.Mapper;
import org.openflamingo.mapreduce.util.CounterUtils;
import org.openflamingo.mapreduce.util.HdfsUtils;
import java.io.IOException;
/**
* 하나 이상의 입력 파일을 받아서 합치는 Aggregation ETL Mapper.
* 이 Mapper는 입력을 그대로 다시 출력한다.
*
* @author Edward KIM
* @author Seo Ji Hye
* @since 0.1
*/
public class AggregateMapper extends Mapper<LongWritable, Text, NullWritable, Text> {
/**
* Input Split의 file name
*/
private String filename;
/**
* 개별 파일당 라인의 개수를 수집할지 여부
*/
private boolean lineCountPerFile;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
Configuration configuration = context.getConfiguration();
lineCountPerFile = configuration.getBoolean("lineCountPerFile", false);
if (lineCountPerFile) {
InputSplit inputSplit = context.getInputSplit();
try {
filename = HdfsUtils.getFilename(inputSplit);
} catch (Exception ex) {
CounterUtils.writerMapperCounter(this, "Cannot get a file name from input split", context);
}
}
}
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
if (lineCountPerFile) CounterUtils.writerMapperCounter(this, filename, context);
context.write(NullWritable.get(), value);
}
}