package org.gbif.checklistbank.service.mybatis.export; import com.google.common.base.Joiner; import com.google.common.collect.ImmutableList; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import org.apache.commons.lang3.time.DateFormatUtils; import org.apache.ibatis.session.ResultContext; import org.apache.ibatis.session.ResultHandler; import org.gbif.api.model.checklistbank.*; import org.gbif.api.vocabulary.Country; import org.gbif.api.vocabulary.Language; import org.gbif.checklistbank.model.ParsedNameUsage; import org.gbif.dwc.terms.*; import org.gbif.dwca.io.DwcaStreamWriter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.*; abstract class RowHandler<T> implements ResultHandler<T>, AutoCloseable { private static final Logger LOG = LoggerFactory.getLogger(RowHandler.class); private static final Joiner CONCAT = Joiner.on(";").skipNulls(); private final DwcaStreamWriter.RowWriteHandler writer; private int counter; private final Term rowType; public RowHandler(DwcaStreamWriter writer, Term rowType, List<Term> columns) { int idx = 1; Map<Term, Integer> mapping = Maps.newHashMap(); for (Term term : columns) { mapping.put(term, idx++); } this.writer = writer.writeHandler(rowType, 0, mapping); this.rowType = rowType; } abstract String[] toRow(T obj); @Override public void handleResult(ResultContext<? extends T> ctx) { writer.write(toRow(ctx.getResultObject())); if (counter++ % 100000 == 0) { LOG.debug("{} {} records added to dwca", counter, rowType.simpleName()); } } public int getCounter() { return counter; } @Override public void close() throws Exception { writer.close(); } private static String toStr(Collection<? extends Enum> es) { if (es == null) return ""; return CONCAT.join(es).toLowerCase().replaceAll("_", " "); } private static String toStr(Language l) { if (l == null) return null; return l.getIso2LetterCode(); } private static String toStr(Enum e) { if (e == null) return null; return e.name().toLowerCase().replaceAll("_", " "); } private static String toStr(Date date) { if (date == null) return null; return DateFormatUtils.ISO_DATE_FORMAT.format(date); } private static String toStr(Object obj) { return obj == null ? null : obj.toString(); } private static void addCountryColumns(String[] row, int idx, Country val) { if (val != null) { row[idx++] = val.getTitle(); row[idx] = val.getIso2LetterCode(); } else { row[idx++] = null; row[idx] = null; } } static class TaxonHandler extends RowHandler<ParsedNameUsage> { static final List<Term> columns = ImmutableList.of( DwcTerm.datasetID, DwcTerm.parentNameUsageID, DwcTerm.acceptedNameUsageID, DwcTerm.originalNameUsageID, DwcTerm.scientificName, DwcTerm.scientificNameAuthorship, GbifTerm.canonicalName, GbifTerm.genericName, DwcTerm.specificEpithet, DwcTerm.infraspecificEpithet, DwcTerm.taxonRank, DwcTerm.nameAccordingTo, DwcTerm.namePublishedIn, DwcTerm.taxonomicStatus, DwcTerm.nomenclaturalStatus, DwcTerm.taxonRemarks, DwcTerm.kingdom, DwcTerm.phylum, DwcTerm.class_, DwcTerm.order, DwcTerm.family, DwcTerm.genus ); private final Set<UUID> constituents = Sets.newHashSet(); private final UUID datasetKey; TaxonHandler(DwcaStreamWriter writer, UUID datasetKey) { super(writer, DwcTerm.Taxon, columns); this.datasetKey = datasetKey; } public Set<UUID> getConstituents() { return constituents; } @Override String[] toRow(ParsedNameUsage u) { String[] row = new String[columns.size()+1]; final ParsedName pn = u.getParsedName(); int idx = 0; row[idx++] = toStr(u.getKey()); row[idx++] = toStr(u.getConstituentKey()); if (u.getConstituentKey() != null && !u.getConstituentKey().equals(datasetKey)) { constituents.add(u.getConstituentKey()); } row[idx++] = toStr(u.getParentKey()); row[idx++] = toStr(u.getAcceptedKey()); row[idx++] = toStr(u.getBasionymKey()); // name row[idx++] = u.getScientificName(); row[idx++] = u.getAuthorship(); row[idx++] = u.getCanonicalName(); row[idx++] = pn.getGenusOrAbove(); row[idx++] = pn.getSpecificEpithet(); row[idx++] = pn.getInfraSpecificEpithet(); // taxon row[idx++] = toStr(u.getRank()); row[idx++] = u.getAccordingTo(); row[idx++] = u.getPublishedIn(); row[idx++] = toStr(u.getTaxonomicStatus()); row[idx++] = toStr(u.getNomenclaturalStatus()); row[idx++] = u.getRemarks(); // classification row[idx++] = u.getKingdom(); row[idx++] = u.getPhylum(); row[idx++] = u.getClazz(); row[idx++] = u.getOrder(); row[idx++] = u.getFamily(); row[idx] = u.getGenus(); return row; } } static class DescriptionHandler extends RowHandler<Description> { static final List<Term> columns = ImmutableList.of( DcTerm.type, DcTerm.language, DcTerm.description, DcTerm.source, DcTerm.creator, DcTerm.contributor, DcTerm.license ); DescriptionHandler(DwcaStreamWriter writer) { super(writer, GbifTerm.Description, columns); } @Override String[] toRow(Description d) { int idx = 0; String[] row = new String[columns.size()+1]; row[idx++] = toStr(d.getTaxonKey()); row[idx++] = d.getType(); row[idx++] = toStr(d.getLanguage()); row[idx++] = d.getDescription(); row[idx++] = d.getSource(); row[idx++] = d.getCreator(); row[idx++] = d.getContributor(); row[idx] = d.getLicense(); return row; } } static class DistributionHandler extends RowHandler<Distribution> { static final List<Term> columns = ImmutableList.of( DwcTerm.locationID, DwcTerm.locality, DwcTerm.country, DwcTerm.countryCode, DwcTerm.locationRemarks, DwcTerm.establishmentMeans, DwcTerm.lifeStage, DwcTerm.occurrenceStatus, IucnTerm.threatStatus, DcTerm.source ); DistributionHandler(DwcaStreamWriter writer) { super(writer, GbifTerm.Distribution, columns); } @Override String[] toRow(Distribution d) { int idx = 0; String[] row = new String[columns.size()+1]; row[idx++] = toStr(d.getTaxonKey()); row[idx++] = d.getLocationId(); row[idx++] = d.getLocality(); addCountryColumns(row, idx, d.getCountry()); idx = idx + 2; row[idx++] = d.getRemarks(); row[idx++] = toStr(d.getEstablishmentMeans()); row[idx++] = toStr(d.getLifeStage()); row[idx++] = toStr(d.getStatus()); row[idx++] = toStr(d.getThreatStatus()); row[idx] = d.getSource(); return row; } } static class NameUsageMediaObjectHandler extends RowHandler<NameUsageMediaObject> { static final List<Term> columns = ImmutableList.of( DcTerm.identifier, DcTerm.references, DcTerm.title, DcTerm.description, DcTerm.license, DcTerm.creator, DcTerm.created, DcTerm.contributor, DcTerm.publisher, DcTerm.rightsHolder, DcTerm.source ); NameUsageMediaObjectHandler(DwcaStreamWriter writer) { super(writer, GbifTerm.Multimedia, columns); } @Override String[] toRow(NameUsageMediaObject m) { int idx = 0; String[] row = new String[columns.size()+1]; row[idx++] = toStr(m.getTaxonKey()); row[idx++] = toStr(m.getIdentifier()); row[idx++] = toStr(m.getReferences()); row[idx++] = m.getTitle(); row[idx++] = m.getDescription(); row[idx++] = m.getLicense(); row[idx++] = m.getCreator(); row[idx++] = toStr(m.getCreated()); row[idx++] = m.getContributor(); row[idx++] = m.getPublisher(); row[idx++] = m.getRightsHolder(); row[idx] = m.getSource(); return row; } } static class ReferenceHandler extends RowHandler<Reference> { static final List<Term> columns = ImmutableList.of( DcTerm.bibliographicCitation, DcTerm.identifier, DcTerm.references, DcTerm.source ); ReferenceHandler(DwcaStreamWriter writer) { super(writer, GbifTerm.Reference, columns); } @Override String[] toRow(Reference r) { int idx = 0; String[] row = new String[columns.size()+1]; row[idx++] = toStr(r.getTaxonKey()); row[idx++] = r.getCitation(); row[idx++] = r.getDoi(); row[idx++] = r.getLink(); row[idx] = r.getSource(); return row; } } static class TypeSpecimenHandler extends RowHandler<TypeSpecimen> { static final List<Term> columns = ImmutableList.of( GbifTerm.typeDesignationType, GbifTerm.typeDesignatedBy, DwcTerm.scientificName, DwcTerm.taxonRank, DcTerm.source ); TypeSpecimenHandler(DwcaStreamWriter writer) { super(writer, GbifTerm.TypesAndSpecimen, columns); } @Override String[] toRow(TypeSpecimen t) { int idx = 0; String[] row = new String[columns.size()+1]; row[idx++] = toStr(t.getTaxonKey()); row[idx++] = toStr(t.getTypeDesignationType()); row[idx++] = t.getTypeDesignatedBy(); row[idx++] = t.getScientificName(); row[idx++] = toStr(t.getTaxonRank()); row[idx] = t.getSource(); return row; } } static class VernacularNameHandler extends RowHandler<VernacularName> { static final List<Term> columns = ImmutableList.of( DwcTerm.vernacularName, DcTerm.language, DwcTerm.country, DwcTerm.countryCode, DwcTerm.sex, DwcTerm.lifeStage, DcTerm.source ); VernacularNameHandler(DwcaStreamWriter writer) { super(writer, GbifTerm.VernacularName, columns); } @Override String[] toRow(VernacularName v) { int idx = 0; String[] row = new String[columns.size()+1]; row[idx++] = toStr(v.getTaxonKey()); row[idx++] = v.getVernacularName(); row[idx++] = toStr(v.getLanguage()); addCountryColumns(row, idx, v.getCountry()); idx=idx+2; row[idx++] = toStr(v.getSex()); row[idx++] = toStr(v.getLifeStage()); row[idx] = v.getSource(); return row; } } }