package eu.dnetlib.iis.wf.collapsers; import java.io.IOException; import org.apache.avro.generic.IndexedRecord; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapred.AvroValue; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Mapper; /** * @author Dominika Tkaczyk */ public class GroupByFieldMapper extends Mapper<AvroKey<IndexedRecord>, NullWritable, AvroKey<String>, AvroValue<IndexedRecord>> { public static final String BLOCKING_FIELD = "blocking_field"; private String blockingField; @Override protected void setup(Context context) throws IOException, InterruptedException { blockingField = context.getConfiguration().get(BLOCKING_FIELD); } @Override protected void map(AvroKey<IndexedRecord> key, NullWritable ignore, Context context) throws IOException, InterruptedException { String id = (String) CollapserUtils.getNestedFieldValue(key.datum(), blockingField); context.write( new AvroKey<String>(id), new AvroValue<IndexedRecord>(key.datum())); } }