package org.gbif.occurrence.download.file.dwca;
import org.gbif.api.service.registry.DatasetOccurrenceDownloadUsageService;
import org.gbif.api.service.registry.DatasetService;
import org.gbif.occurrence.download.file.DownloadAggregator;
import org.gbif.occurrence.download.file.DownloadJobConfiguration;
import org.gbif.occurrence.download.file.Result;
import org.gbif.occurrence.download.file.common.DatasetUsagesCollector;
import org.gbif.occurrence.download.file.common.DownloadFileUtils;
import org.gbif.occurrence.download.util.HeadersFileUtil;
import org.gbif.occurrence.download.util.RegistryClientUtil;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.util.Collections;
import java.util.List;
import javax.inject.Inject;
import com.google.common.base.Throwables;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Aggregates partials results of files and combine then into the output zip file.
*/
public class DwcaDownloadAggregator implements DownloadAggregator {
private static final Logger LOG = LoggerFactory.getLogger(DwcaDownloadAggregator.class);
// Service that persist dataset usage information
private final DatasetOccurrenceDownloadUsageService datasetOccUsageService;
//Dataset service
private final DatasetService datasetService;
private final DownloadJobConfiguration configuration;
private final RegistryClientUtil registryClientUtil;
/**
* Utility method that creates a file, if the files exists it is deleted.
*/
private static void createFile(String outFile) {
try {
File file = new File(outFile);
if (file.exists()) {
file.delete();
}
file.createNewFile();
} catch (IOException e) {
LOG.error("Error creating file", e);
throw Throwables.propagate(e);
}
}
/**
* Appends the result files to the output file.
*/
private static void appendResult(
Result result,
OutputStream interpretedFileWriter,
OutputStream verbatimFileWriter,
OutputStream multimediaFileWriter
) throws IOException {
DownloadFileUtils.appendAndDelete(result.getDownloadFileWork().getJobDataFileName()
+ TableSuffixes.INTERPRETED_SUFFIX, interpretedFileWriter);
DownloadFileUtils.appendAndDelete(result.getDownloadFileWork().getJobDataFileName() + TableSuffixes.VERBATIM_SUFFIX,
verbatimFileWriter);
DownloadFileUtils.appendAndDelete(result.getDownloadFileWork().getJobDataFileName()
+ TableSuffixes.MULTIMEDIA_SUFFIX, multimediaFileWriter);
}
@Inject
public DwcaDownloadAggregator(
DatasetOccurrenceDownloadUsageService datasetOccUsageService,
DatasetService datasetService,
DownloadJobConfiguration configuration,
RegistryClientUtil registryClientUtil
) {
this.datasetService = datasetService;
this.datasetOccUsageService = datasetOccUsageService;
this.configuration = configuration;
this.registryClientUtil = registryClientUtil;
}
public void init() {
createFile(configuration.getInterpretedDataFileName());
createFile(configuration.getVerbatimDataFileName());
createFile(configuration.getMultimediaDataFileName());
}
/**
* Collects the results of each job.
* Iterates over the list of futures to collect individual results.
*/
public void aggregate(List<Result> results) {
init();
try (
FileOutputStream interpretedFileWriter = new FileOutputStream(configuration.getInterpretedDataFileName(), true);
FileOutputStream verbatimFileWriter = new FileOutputStream(configuration.getVerbatimDataFileName(), true);
FileOutputStream multimediaFileWriter = new FileOutputStream(configuration.getMultimediaDataFileName(), true)) {
HeadersFileUtil.appendInterpretedHeaders(interpretedFileWriter);
HeadersFileUtil.appendVerbatimHeaders(verbatimFileWriter);
HeadersFileUtil.appendMultimediaHeaders(multimediaFileWriter);
if (!results.isEmpty()) {
// Results are sorted to respect the original ordering
Collections.sort(results);
DatasetUsagesCollector datasetUsagesCollector = new DatasetUsagesCollector();
for (Result result : results) {
datasetUsagesCollector.sumUsages(result.getDatasetUsages());
appendResult(result, interpretedFileWriter, verbatimFileWriter, multimediaFileWriter);
}
CitationsFileWriter.createCitationFile(datasetUsagesCollector.getDatasetUsages(),
configuration.getCitationDataFileName(),
datasetOccUsageService,
datasetService,
configuration.getDownloadKey());
}
//Creates the DwcA zip file
DwcaArchiveBuilder.buildArchive(configuration, registryClientUtil);
} catch (Exception e) {
throw Throwables.propagate(e);
}
}
}