package eu.dnetlib.iis.common.report; import java.util.Collections; import java.util.List; import java.util.Map; import java.util.stream.Collectors; import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import eu.dnetlib.iis.common.counter.PigCounters; import eu.dnetlib.iis.common.counter.PigCountersParser; import eu.dnetlib.iis.common.java.PortBindings; import eu.dnetlib.iis.common.java.Process; import eu.dnetlib.iis.common.java.io.DataStore; import eu.dnetlib.iis.common.java.io.FileSystemPath; import eu.dnetlib.iis.common.java.porttype.AvroPortType; import eu.dnetlib.iis.common.java.porttype.PortType; import eu.dnetlib.iis.common.schemas.ReportEntry; /** * Java workflow node process for building report from pig counters.<br/> * <br/> * It writes report properties into avro datastore of {@link ReportEntry}s * with location specified in output port.<br/> * Report property key must start with <code>report.</code> to * be included in output datastore.<br/> * <br/> * Report property values can contain placeholders for easier evaluation of * pig counters. Placeholders are resolved using {@link PigCounterValueResolver}.<br/> * <br/> * Process needs <code>pigCounters</code> property that contains json representation * of pig counters for working.<br/> * * @author madryk */ public class PigCountersReportGenerator implements Process { private static final String REPORT_PORT_OUT_NAME = "report"; private static final String REPORT_PROPERTY_PREFIX = "report."; private static final String PIG_COUNTERS_PROPERTY = "pigCounters"; private PigCountersParser pigCountersParser = new PigCountersParser(); private ReportPigCounterMappingParser reportPigCounterMappingParser = new ReportPigCounterMappingParser(); private ReportPigCountersResolver reportPigCountersResolver = new ReportPigCountersResolver(); //------------------------ LOGIC -------------------------- @Override public Map<String, PortType> getInputPorts() { return Collections.emptyMap(); } @Override public Map<String, PortType> getOutputPorts() { return Collections.singletonMap(REPORT_PORT_OUT_NAME, new AvroPortType(ReportEntry.SCHEMA$)); } @Override public void run(PortBindings portBindings, Configuration conf, Map<String, String> parameters) throws Exception { String pigCountersJson = parameters.get(PIG_COUNTERS_PROPERTY); PigCounters pigCounters = pigCountersParser.parse(pigCountersJson); List<ReportPigCounterMapping> reportCountersMapping = collectReportCountersMapping(parameters); List<ReportEntry> reportCounters = reportPigCountersResolver.resolveReportCounters(pigCounters, reportCountersMapping); FileSystem fs = FileSystem.get(conf); Path reportPath = portBindings.getOutput().get(REPORT_PORT_OUT_NAME); DataStore.create(reportCounters, new FileSystemPath(fs, reportPath)); } //------------------------ PRIVATE -------------------------- private List<ReportPigCounterMapping> collectReportCountersMapping(Map<String, String> parameters) { return parameters.entrySet().stream() .filter(property -> property.getKey().startsWith(REPORT_PROPERTY_PREFIX)) .map(x -> Pair.of(x.getKey().substring(REPORT_PROPERTY_PREFIX.length()), x.getValue())) .map(x -> reportPigCounterMappingParser.parse(x.getKey(), x.getValue())) .collect(Collectors.toList()); } }