package org.gbif.occurrence.download.file.simplecsv; import org.gbif.dwc.terms.DcTerm; import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwc.terms.Term; import org.gbif.occurrence.download.file.DownloadFileWork; import org.gbif.occurrence.download.file.Result; import org.gbif.occurrence.download.file.common.DatasetUsagesCollector; import org.gbif.occurrence.download.file.common.SolrQueryProcessor; import org.gbif.occurrence.download.hive.DownloadTerms; import java.io.IOException; import java.util.Date; import java.util.Map; import javax.annotation.Nullable; import akka.actor.UntypedActor; import com.google.common.base.Charsets; import com.google.common.base.Function; import com.google.common.base.Predicate; import com.google.common.base.Throwables; import com.google.common.collect.Collections2; import org.apache.commons.beanutils.ConvertUtils; import org.apache.commons.beanutils.converters.DateConverter; import org.apache.commons.io.output.FileWriterWithEncoding; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.supercsv.io.CsvMapWriter; import org.supercsv.io.ICsvMapWriter; import org.supercsv.prefs.CsvPreference; import static org.gbif.occurrence.download.file.OccurrenceMapReader.buildOccurrenceMap; /** * Actor that creates a part of the simple csv download file. */ public class SimpleCsvDownloadActor extends UntypedActor { private static final Logger LOG = LoggerFactory.getLogger(SimpleCsvDownloadActor.class); static { //https://issues.apache.org/jira/browse/BEANUTILS-387 ConvertUtils.register(new DateConverter(null), Date.class); } private static final String[] COLUMNS = Collections2.transform(DownloadTerms.SIMPLE_DOWNLOAD_TERMS, new Function<Term, String>() { @Nullable @Override public String apply(@Nullable Term input) { return input.simpleName(); } }).toArray(new String[DownloadTerms.SIMPLE_DOWNLOAD_TERMS.size()]); @Override public void onReceive(Object message) throws Exception { if (message instanceof DownloadFileWork) { doWork((DownloadFileWork) message); } else { unhandled(message); } } /** * Executes the job.query and creates a data file that will contains the records from job.from to job.to positions. */ private void doWork(final DownloadFileWork work) throws IOException { final DatasetUsagesCollector datasetUsagesCollector = new DatasetUsagesCollector(); try (ICsvMapWriter csvMapWriter = new CsvMapWriter(new FileWriterWithEncoding(work.getJobDataFileName(), Charsets.UTF_8), CsvPreference.TAB_PREFERENCE)) { SolrQueryProcessor.processQuery(work, new Predicate<Integer>() { @Override public boolean apply(@Nullable Integer occurrenceKey) { try { org.apache.hadoop.hbase.client.Result result = work.getOccurrenceMapReader().get(occurrenceKey); Map<String, String> occurrenceRecordMap = buildOccurrenceMap(result, DownloadTerms.SIMPLE_DOWNLOAD_TERMS); if (occurrenceRecordMap != null) { //collect usages datasetUsagesCollector.collectDatasetUsage(occurrenceRecordMap.get(GbifTerm.datasetKey.simpleName()), occurrenceRecordMap.get(DcTerm.license.simpleName())); //write results csvMapWriter.write(occurrenceRecordMap, COLUMNS); return true; } else { LOG.error(String.format("Occurrence id %s not found!", occurrenceKey)); } } catch (Exception e) { throw Throwables.propagate(e); } return false; } }); } finally { // Release the lock work.getLock().unlock(); LOG.info("Lock released, job detail: {} ", work.toString()); } getSender().tell(new Result(work, datasetUsagesCollector.getDatasetUsages(), datasetUsagesCollector.getDatasetLicenses()), getSelf()); } }