package org.openflamingo.mapreduce.etl.grep; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import org.openflamingo.mapreduce.util.CounterUtils; import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * 정규 표현식을 이용하여 로우를 Grep하는 Grep ETL 매퍼 * * @author Edward KIM * @author Seo Ji Hye * @since 0.1 */ public class GrepRowMapper extends Mapper<LongWritable, Text, NullWritable, Text> { /** * 정규 표현식 */ private String regEx; /** * 정규 표현식 패턴 */ private Pattern pattern; @Override protected void setup(Context context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); regEx = configuration.get("regEx", null); pattern = Pattern.compile(regEx); } @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { String column = value.toString(); Matcher matcher = pattern.matcher(column); if (matcher.find()) { CounterUtils.writerMapperCounter(this, "YES", context); context.write(NullWritable.get(), new Text(value)); } else { CounterUtils.writerMapperCounter(this, "NO", context); } } }