package eu.dnetlib.iis.wf.collapsers; import java.io.IOException; import java.lang.reflect.Constructor; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.avro.Schema; import org.apache.avro.generic.IndexedRecord; import org.apache.avro.mapred.AvroKey; import org.apache.avro.mapred.AvroValue; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.mapreduce.Reducer; import eu.dnetlib.iis.common.utils.AvroUtils; /** * @author Dominika Tkaczyk */ public class CollapserReducer extends Reducer<AvroKey<String>, AvroValue<IndexedRecord>, AvroKey<IndexedRecord>, NullWritable> { public static final String RECORD_COLLAPSER = "record_collapser"; public static final String INPUT_SCHEMA = "collapser.reducer.schema.class"; private RecordCollapser<IndexedRecord, IndexedRecord> recordCollapser; private Class<IndexedRecord> inputSchemaClass; private Schema inputSchema; @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { try { recordCollapser = (RecordCollapser<IndexedRecord, IndexedRecord>) getCollapserInstance(context, RECORD_COLLAPSER); recordCollapser.setup(context); String inputSchemaPath = context.getConfiguration().get(INPUT_SCHEMA); inputSchema = AvroUtils.toSchema(inputSchemaPath); inputSchemaClass = (Class<IndexedRecord>) Class.forName(inputSchemaPath); } catch (Exception ex) { throw new IOException("Cannot set up collapser reducer!", ex); } } private static Object getCollapserInstance(Context context, String parameter) throws Exception { Class<?> collapserClass = Class.forName(context.getConfiguration().get(parameter)); Constructor<?> collapserConstructor = collapserClass.getConstructor(); return collapserConstructor.newInstance(); } @Override public void reduce(AvroKey<String> key, Iterable<AvroValue<IndexedRecord>> values, Context context) throws IOException, InterruptedException { Iterator<AvroValue<IndexedRecord>> iterator = values.iterator(); List<IndexedRecord> objects = new ArrayList<IndexedRecord>(); while (iterator.hasNext()) { AvroValue<IndexedRecord> value = iterator.next(); objects.add((IndexedRecord) AvroUtils.getCopy(value.datum(), inputSchema, inputSchemaClass)); } List<IndexedRecord> collapsedList = recordCollapser.collapse(objects); for (IndexedRecord collapsed : collapsedList) { context.write(new AvroKey<IndexedRecord>(collapsed), NullWritable.get()); } } }