package org.gbif.occurrence.download.hive;
import org.gbif.dwc.terms.DcTerm;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.GbifInternalTerm;
import org.gbif.dwc.terms.GbifTerm;
import org.gbif.dwc.terms.Term;
import java.util.Set;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
/**
* Definitions of terms used in downloading, and in create tables used during the download process.
*/
public class DownloadTerms {
//This list of exclusion is used for the download query only
public static final Set<Term> EXCLUSIONS_INTERPRETED = ImmutableSet.<Term>of(GbifTerm.gbifID,
// returned multiple times, so excluded and treated by adding once at the beginning
GbifInternalTerm.fragmentHash,
// omitted entirely
GbifInternalTerm.fragment
// omitted entirely
);
//This set is used fot the HDFS table definition
//GbifTerm.mediaType handled as extension
public static final Set<Term> EXCLUSIONS = new ImmutableSet.Builder().addAll(EXCLUSIONS_INTERPRETED).add(GbifTerm.mediaType).build();
public static final Set<Term> DOWNLOAD_INTERPRETED_TERMS_HDFS =
Sets.difference(ImmutableSet.<Term>copyOf(Terms.interpretedTerms()), EXCLUSIONS).immutableCopy();
public static final Set<Term> DOWNLOAD_INTERPRETED_TERMS =
Sets.difference(ImmutableSet.<Term>copyOf(Terms.interpretedTerms()), EXCLUSIONS_INTERPRETED).immutableCopy();
public static final Set<Term> DOWNLOAD_VERBATIM_TERMS =
Sets.difference(ImmutableSet.<Term>copyOf(Terms.verbatimTerms()), EXCLUSIONS).immutableCopy();
/**
* The terms that will be included in the interpreted table if also present in ${@link
* org.gbif.occurrence.download.hive.Terms#interpretedTerms()}
*/
public static final Set<Term> SIMPLE_DOWNLOAD_TERMS = ImmutableSet.<Term>of(GbifTerm.gbifID,
GbifTerm.datasetKey,
DwcTerm.occurrenceID,
DwcTerm.kingdom,
DwcTerm.phylum,
DwcTerm.class_,
DwcTerm.order,
DwcTerm.family,
DwcTerm.genus,
GbifTerm.species,
DwcTerm.infraspecificEpithet,
DwcTerm.taxonRank,
DwcTerm.scientificName,
DwcTerm.countryCode,
DwcTerm.locality,
GbifInternalTerm.publishingOrgKey,
DwcTerm.decimalLatitude,
DwcTerm.decimalLongitude,
DwcTerm.coordinateUncertaintyInMeters,
DwcTerm.coordinatePrecision,
GbifTerm.elevation,
GbifTerm.elevationAccuracy,
GbifTerm.depth,
GbifTerm.depthAccuracy,
DwcTerm.eventDate,
DwcTerm.day,
DwcTerm.month,
DwcTerm.year,
GbifTerm.taxonKey,
GbifTerm.speciesKey,
DwcTerm.basisOfRecord,
DwcTerm.institutionCode,
DwcTerm.collectionCode,
DwcTerm.catalogNumber,
DwcTerm.recordNumber,
DwcTerm.identifiedBy,
DcTerm.license,
DcTerm.rightsHolder,
DwcTerm.recordedBy,
DwcTerm.typeStatus,
DwcTerm.establishmentMeans,
GbifTerm.lastInterpreted,
GbifTerm.mediaType,
GbifTerm.issue);
}