package eu.dnetlib.iis.wf.export.actionmanager.sequencefile; import static eu.dnetlib.iis.wf.export.actionmanager.ExportWorkflowRuntimeParameters.EXPORT_ACTION_BUILDER_FACTORY_CLASSNAME; import static eu.dnetlib.iis.wf.export.actionmanager.ExportWorkflowRuntimeParameters.EXPORT_ACTION_SETID; import static eu.dnetlib.iis.wf.export.actionmanager.ExportWorkflowRuntimeParameters.EXPORT_ALGORITHM_PROPERTY_SEPARATOR; import java.io.IOException; import java.lang.reflect.Constructor; import java.security.InvalidParameterException; import java.util.Collections; import java.util.List; import org.apache.avro.mapred.AvroKey; import org.apache.avro.specific.SpecificRecordBase; import org.apache.commons.lang.StringUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; import eu.dnetlib.actionmanager.actions.AtomicAction; import eu.dnetlib.iis.common.WorkflowRuntimeParameters; import eu.dnetlib.iis.wf.export.actionmanager.cfg.StaticConfigurationProvider; import eu.dnetlib.iis.wf.export.actionmanager.module.ActionBuilderFactory; import eu.dnetlib.iis.wf.export.actionmanager.module.ActionBuilderModule; import eu.dnetlib.iis.wf.export.actionmanager.module.AlgorithmName; import eu.dnetlib.iis.wf.export.actionmanager.module.MappingNotDefinedException; import eu.dnetlib.iis.wf.export.actionmanager.module.TrustLevelThresholdExceededException; /** * ActionManager service based exporter mapper. * * @author mhorst * */ public class SequenceFileExporterMapper extends Mapper<AvroKey<? extends SpecificRecordBase>, NullWritable, Text, Text> { private ActionBuilderModule<SpecificRecordBase> actionBuilder; // ----------------------- LOGIC -------------------------------- /** This is the place you can access map-reduce workflow node parameters */ @SuppressWarnings("unchecked") @Override protected void setup(Context context) throws IOException, InterruptedException { String moduleClassName = context.getConfiguration().get(EXPORT_ACTION_BUILDER_FACTORY_CLASSNAME); if (StringUtils.isNotBlank(moduleClassName)) { try { Class<?> clazz = Class.forName(moduleClassName); Constructor<?> constructor = clazz.getConstructor(); ActionBuilderFactory<SpecificRecordBase> actionBuilderFactory = (ActionBuilderFactory<SpecificRecordBase>) constructor.newInstance(); actionBuilder = actionBuilderFactory.instantiate(context.getConfiguration(), StaticConfigurationProvider.AGENT_DEFAULT, getActionSetId(actionBuilderFactory.getAlgorithName(), context.getConfiguration())); } catch (Exception e) { throw new RuntimeException( "unexpected exception ocurred when instantiating " + "builder module: " + moduleClassName, e); } } else { throw new InvalidParameterException("unknown action builder module instance, " + "no " + EXPORT_ACTION_BUILDER_FACTORY_CLASSNAME + " parameter provided!"); } } @Override protected void map(AvroKey<? extends SpecificRecordBase> key, NullWritable ignore, Context context) throws IOException, InterruptedException { List<AtomicAction> actions = createActions(key.datum()); if (actions != null) { for (AtomicAction action : actions) { Text keyOut = new Text(); Text valueOut = new Text(); keyOut.set(action.getRowKey()); valueOut.set(action.toString()); context.write(keyOut, valueOut); } } } // ----------------------- PRIVATE -------------------------------- /** * Provides action set identifier extracted from job configuration. * Checks whether action set was defined for particular algorithm or picks default value if specified. * @param algorithmName inference algorithm name * @param cfg job configuration * @throws MappingNotDefinedException thrown when action set identifier not specified in configuration */ private static String getActionSetId(AlgorithmName algorithmName, Configuration cfg) throws MappingNotDefinedException { String actionSetId = WorkflowRuntimeParameters.getParamValue( EXPORT_ACTION_SETID + EXPORT_ALGORITHM_PROPERTY_SEPARATOR + algorithmName.name(), EXPORT_ACTION_SETID, cfg); if (actionSetId!=null) { return actionSetId; } else { throw new MappingNotDefinedException( "no action set identifier defined " + "for algorithm: " + algorithmName.name()); } } /** * Creates list of actions for given avro object. * * @param datum source avro object */ private List<AtomicAction> createActions(SpecificRecordBase datum) { try { return actionBuilder.build(datum); } catch (TrustLevelThresholdExceededException e) { return Collections.emptyList(); } } }