package eu.dnetlib.iis.wf.report;
import static eu.dnetlib.iis.common.WorkflowRuntimeParameters.OOZIE_ACTION_OUTPUT_FILENAME;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.collections.CollectionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import com.google.common.collect.Lists;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.stream.JsonWriter;
import eu.dnetlib.iis.common.java.PortBindings;
import eu.dnetlib.iis.common.java.Process;
import eu.dnetlib.iis.common.java.io.DataStore;
import eu.dnetlib.iis.common.java.io.FileSystemPath;
import eu.dnetlib.iis.common.java.porttype.AnyPortType;
import eu.dnetlib.iis.common.java.porttype.PortType;
import eu.dnetlib.iis.common.schemas.ReportEntry;
import eu.dnetlib.iis.common.schemas.ReportEntryType;
/**
* Java workflow node responsible for merging partial reports into single json file.<br />
* It reads input partial report datastores ({@link ReportEntry}s) located under
* single parent location. Process assumes that partial report datastores are
* located in subdirectories of the provided input path.<br />
* As a result process writes merged report into json file.
*
* @author madryk
*/
public class ReportMerger implements Process {
protected static final String PARTIAL_REPORTS_PORT_IN_NAME = "partial_reports";
protected static final String REPORT_PORT_OUT_NAME = "report";
private final ReportEntryJsonAppender reportEntryAppender = new ReportEntryJsonAppender();
//------------------------ LOGIC --------------------------
@Override
public Map<String, PortType> getInputPorts() {
return Collections.singletonMap(PARTIAL_REPORTS_PORT_IN_NAME, new AnyPortType());
}
@Override
public Map<String, PortType> getOutputPorts() {
return Collections.singletonMap(REPORT_PORT_OUT_NAME, new AnyPortType());
}
@Override
public void run(PortBindings portBindings, Configuration conf, Map<String, String> parameters) throws Exception {
FileSystem fs = FileSystem.get(conf);
List<ReportEntry> allReportEntries = readAllPartialReports(fs, portBindings.getInput().get(PARTIAL_REPORTS_PORT_IN_NAME));
writeActionData(allReportEntries);
JsonObject jsonReport = buildJsonReport(allReportEntries);
writeJsonReport(jsonReport, fs, portBindings.getOutput().get(REPORT_PORT_OUT_NAME));
}
//------------------------ PRIVATE --------------------------
/**
* Writes all counters as action data properties.
*/
private void writeActionData(List<ReportEntry> reportEntries) throws FileNotFoundException, IOException {
if (CollectionUtils.isNotEmpty(reportEntries)) {
File file = new File(System.getProperty(OOZIE_ACTION_OUTPUT_FILENAME));
Properties props = new Properties();
try (OutputStream os = new FileOutputStream(file)) {
for (ReportEntry currentEntry : reportEntries) {
if (ReportEntryType.COUNTER.equals(currentEntry.getType())) {
props.setProperty(currentEntry.getKey().toString(), currentEntry.getValue().toString());
}
}
props.store(os, "");
}
}
}
private void writeJsonReport(JsonObject jsonReport, FileSystem fs, Path outputReportPath) throws IOException {
Gson gson = new Gson();
try (JsonWriter jsonWriter = new JsonWriter(new OutputStreamWriter(fs.create(outputReportPath), "utf8"))) {
jsonWriter.setIndent(" ");
gson.toJson(jsonReport, jsonWriter);
}
}
private JsonObject buildJsonReport(List<ReportEntry> reportEntries) {
JsonObject jsonReport = new JsonObject();
for (ReportEntry reportEntry : reportEntries) {
reportEntryAppender.appendReportEntry(jsonReport, reportEntry);
}
return jsonReport;
}
private List<ReportEntry> readAllPartialReports(FileSystem fs, Path partialReportsBasePath) throws FileNotFoundException, IOException {
FileStatus[] reportsBaseDirContent = fs.listStatus(partialReportsBasePath);
List<FileSystemPath> reportDatastorePaths = Lists.newArrayList();
for (FileStatus fileStatus : reportsBaseDirContent) {
if (fs.isDirectory(fileStatus.getPath())) {
reportDatastorePaths.add(new FileSystemPath(fs, fileStatus.getPath()));
}
}
List<ReportEntry> allReportEntries = Lists.newArrayList();
for (FileSystemPath datastorePath : reportDatastorePaths) {
allReportEntries.addAll(DataStore.read(datastorePath, ReportEntry.SCHEMA$));
}
return allReportEntries;
}
}