package com.github.minyk.morphlinesmr.mapper;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import com.github.minyk.morphlinesmr.MorphlinesMRConfig;
import com.github.minyk.morphlinesmr.partitioner.ExceptionPartitioner;
import org.apache.commons.io.FilenameUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.MRJobConfig;
import org.apache.hadoop.mapreduce.Mapper;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MorphlinesMapper extends Mapper<LongWritable, Text, Text, Text> {
private static final Logger LOGGER = LoggerFactory.getLogger(MorphlinesMapper.class);
public static final String EXCEPTION_KEY_FIELD = "exceptionkey";
private final Record record = new Record();
private Command morphline;
boolean useReducers;
File morphLineFile;
@Override
protected void setup(Context context) throws IOException, InterruptedException {
boolean test = context.getConfiguration().getBoolean(MorphlinesMRConfig.MORPHLINE_FILE_TEST, false);
String confName;
if(test) {
confName = context.getConfiguration().get(MorphlinesMRConfig.MORPHLINE_FILE); //for test
} else {
confName = FilenameUtils.getName(context.getConfiguration().get(MorphlinesMRConfig.MORPHLINE_FILE));
}
morphLineFile = new File(confName);
String morphLineId = context.getConfiguration().get(MorphlinesMRConfig.MORPHLINE_ID);
MapperRecordEmitter recordEmitter = new MapperRecordEmitter(context);
MorphlineContext morphlineContext = new MorphlineContext.Builder().build();
morphline = new org.kitesdk.morphline.base.Compiler()
.compile(morphLineFile, morphLineId, morphlineContext, recordEmitter);
if(context.getConfiguration().getInt(MRJobConfig.NUM_REDUCES,0) == 0) {
useReducers = false;
} else {
useReducers = true;
}
}
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
record.put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(value.toString().getBytes()));
if(LOGGER.isDebugEnabled()) {
LOGGER.debug("Value: " + value.toString());
}
if(useReducers) {
record.put(EXCEPTION_KEY_FIELD, ExceptionPartitioner.EXCEPTION_KEY_VALUE);
}
if (!morphline.process(record)) {
LOGGER.info("Morphline failed to process record: {}", record);
}
//record.removeAll(Fields.ATTACHMENT_BODY);
record.getFields().clear();
}
}