package org.gbif.occurrence.persistence; import org.gbif.api.model.common.MediaObject; import org.gbif.api.model.occurrence.Occurrence; import org.gbif.api.model.occurrence.VerbatimOccurrence; import org.gbif.api.util.IsoDateParsingUtils.IsoDateFormat; import org.gbif.api.vocabulary.BasisOfRecord; import org.gbif.api.vocabulary.Continent; import org.gbif.api.vocabulary.Country; import org.gbif.api.vocabulary.EndpointType; import org.gbif.api.vocabulary.EstablishmentMeans; import org.gbif.api.vocabulary.Extension; import org.gbif.api.vocabulary.LifeStage; import org.gbif.api.vocabulary.MediaType; import org.gbif.api.vocabulary.OccurrenceIssue; import org.gbif.api.vocabulary.Rank; import org.gbif.api.vocabulary.Sex; import org.gbif.api.vocabulary.TypeStatus; import org.gbif.dwc.terms.DcTerm; import org.gbif.dwc.terms.DwcTerm; import org.gbif.dwc.terms.GbifInternalTerm; import org.gbif.dwc.terms.GbifTerm; import org.gbif.dwc.terms.IucnTerm; import org.gbif.dwc.terms.Term; import org.gbif.dwc.terms.TermFactory; import org.gbif.occurrence.common.config.OccHBaseConfiguration; import org.gbif.occurrence.persistence.hbase.Columns; import java.io.IOException; import java.net.MalformedURLException; import java.net.URI; import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.UUID; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.Connection; import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.RowMutations; import org.apache.hadoop.hbase.client.Table; import org.apache.hadoop.hbase.util.Bytes; import org.junit.AfterClass; import org.junit.Assert; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Ignore; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; public class OccurrencePersistenceServiceImplTest { private static final OccHBaseConfiguration CFG = new OccHBaseConfiguration(); static { CFG.setEnvironment("test"); } private static final byte[] TABLE = Bytes.toBytes(CFG.occTable); private static final String CF_NAME = "o"; private static final byte[] CF = Bytes.toBytes(CF_NAME); private static final int KEY = 1000000; private static final int BAD_KEY = 2000000; private static final double ELEV = 1000d; private static final BasisOfRecord BOR = BasisOfRecord.PRESERVED_SPECIMEN; private static final int CLASS_ID = 99; private static final String CLASS = "Mammalia"; private static final UUID DATASET_KEY = UUID.randomUUID(); private static final double DEPTH = 90d; private static final String FAMILY = "Felidae"; private static final int FAMILY_KEY = 90897087; private static final String GENUS = "Panthera"; private static final int GENUS_KEY = 9737; private static final Country PUB_COUNTRY = Country.CANADA; private static final String KINGDOM = "Animalia"; private static final int KINGDOM_ID = 1; private static final double LAT = 45.23423; private static final double LNG = 5.97087; private static final Date MOD = new Date(); private static final int MONTH = 6; private static final int TAXON_KEY = 8798793; private static final Date EVENT_DATE = new Date(); private static final String ORDER = "Carnivora"; private static final int ORDER_KEY = 8973; private static final UUID PUBLISHING_ORG_KEY = UUID.randomUUID(); private static final String PHYLUM = "Chordata"; private static final int PHYLUM_KEY = 23; private static final EndpointType PROTOCOL = EndpointType.BIOCASE; private static final String SCI_NAME = "Panthera onca (Linnaeus, 1758)"; private static final String SPECIES = "Onca"; private static final int SPECIES_KEY = 1425; private static final int YEAR = 1972; private static final String XML = "<record>some fake xml</record>"; // newer fields from occurrence widening private static final Double ELEV_ACC = 10d; private static final Double UNCERTAINTY_METERS = new Double(50.5); private static final Continent CONTINENT = Continent.AFRICA; private static final Country COUNTRY = Country.TANZANIA; private static final Date DATE_IDENTIFIED = new Date(); private static final Integer DAY = 3; private static final Double DEPTH_ACC = 15d; private static final EstablishmentMeans ESTAB_MEANS = EstablishmentMeans.NATIVE; private static final String GEO_DATUM = "WGS84"; private static final Integer INDIVIDUAL_COUNT = 123; private static final Date LAST_CRAWLED = new Date(); private static final Date LAST_PARSED = new Date(); private static final Integer CRAWL_ID = 7; private static final Date LAST_INTERPRETED = new Date(); private static final LifeStage LIFE_STAGE = LifeStage.ADULT; private static final Sex SEX = Sex.FEMALE; private static final String STATE_PROV = "BO"; private static final String WATERBODY = "Indian Ocean"; private static final String SUBGENUS = "subby"; private static final Integer SUBGENUS_KEY = 123; private static final TypeStatus TYPE_STATUS = TypeStatus.EPITYPE; private static final String TYPIFIED_NAME = "Aloo gobi"; // even newer fields private static final String GENERIC_NAME = "generic name"; private static final String SPECIFIC_EPITHET = "onca"; private static final String INFRA_SPECIFIC_EPITHET = "infraonca"; private static final Rank TAXON_RANK = Rank.SPECIES; private static final String ID_0 = "http://gbif.org"; private static final String ID_TYPE_0 = "URL"; private static final String ID_1 = "ftp://filezilla.org"; private static final String ID_TYPE_1 = "FTP"; private static final String ID_2 = "1234"; private static final String ID_TYPE_2 = "GBIF_PORTAL"; private static final String TERM_VALUE_PREFIX = "I am "; private static Connection CONNECTION = null; private OccurrencePersistenceServiceImpl occurrenceService; private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); @AfterClass public static void afterClass() throws Exception { TEST_UTIL.shutdownMiniCluster(); CONNECTION.close(); } @BeforeClass public static void beforeClass() throws Exception { TEST_UTIL.startMiniCluster(1); TEST_UTIL.createTable(TABLE, CF); CONNECTION = ConnectionFactory.createConnection(TEST_UTIL.getConfiguration()); } @Before public void setUp() throws Exception { TEST_UTIL.truncateTable(TABLE); occurrenceService = new OccurrencePersistenceServiceImpl(CFG, CONNECTION); try (Table table = CONNECTION.getTable(TableName.valueOf(CFG.occTable))) { Put put = new Put(Bytes.toBytes(KEY)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.elevation)), Bytes.toBytes(ELEV)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.basisOfRecord)), Bytes.toBytes(BOR.name())); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.classKey)), Bytes.toBytes(CLASS_ID)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.class_)), Bytes.toBytes(CLASS)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.datasetKey)), Bytes.toBytes(DATASET_KEY.toString())); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.depth)), Bytes.toBytes(DEPTH)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.family)), Bytes.toBytes(FAMILY)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.familyKey)), Bytes.toBytes(FAMILY_KEY)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.genus)), Bytes.toBytes(GENUS)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.genusKey)), Bytes.toBytes(GENUS_KEY)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.publishingCountry)), Bytes.toBytes(PUB_COUNTRY.getIso2LetterCode())); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.lastCrawled)), Bytes.toBytes(LAST_CRAWLED.getTime())); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifInternalTerm.crawlId)), Bytes.toBytes(CRAWL_ID)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.lastParsed)), Bytes.toBytes(LAST_PARSED.getTime())); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.lastInterpreted)), Bytes.toBytes(LAST_INTERPRETED.getTime())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.kingdom)), Bytes.toBytes(KINGDOM)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.kingdomKey)), Bytes.toBytes(KINGDOM_ID)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.decimalLatitude)), Bytes.toBytes(LAT)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.decimalLongitude)), Bytes.toBytes(LNG)); put.addColumn(CF, Bytes.toBytes(Columns.column(DcTerm.modified)), Bytes.toBytes(MOD.getTime())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.month)), Bytes.toBytes(MONTH)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.taxonKey)), Bytes.toBytes(TAXON_KEY)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.eventDate)), Bytes.toBytes(EVENT_DATE.getTime())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.order)), Bytes.toBytes(ORDER)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.orderKey)), Bytes.toBytes(ORDER_KEY)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifInternalTerm.publishingOrgKey)), Bytes.toBytes(PUBLISHING_ORG_KEY.toString())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.phylum)), Bytes.toBytes(PHYLUM)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.phylumKey)), Bytes.toBytes(PHYLUM_KEY)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.protocol)), Bytes.toBytes(PROTOCOL.toString())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.scientificName)), Bytes.toBytes(SCI_NAME)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.species)), Bytes.toBytes(SPECIES)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.speciesKey)), Bytes.toBytes(SPECIES_KEY)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.year)), Bytes.toBytes(YEAR)); // new for occurrence widening put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.elevationAccuracy)), Bytes.toBytes(ELEV_ACC)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.coordinateUncertaintyInMeters)), Bytes.toBytes(UNCERTAINTY_METERS)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.continent)), Bytes.toBytes(CONTINENT.toString())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.countryCode)), Bytes.toBytes(COUNTRY.getIso2LetterCode())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.dateIdentified)), Bytes.toBytes(DATE_IDENTIFIED.getTime())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.day)), Bytes.toBytes(DAY)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.depthAccuracy)), Bytes.toBytes(DEPTH_ACC)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.establishmentMeans)), Bytes.toBytes(ESTAB_MEANS.toString())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.individualCount)), Bytes.toBytes(INDIVIDUAL_COUNT)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.lastInterpreted)), Bytes.toBytes(LAST_INTERPRETED.getTime())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.lifeStage)), Bytes.toBytes(LIFE_STAGE.toString())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.sex)), Bytes.toBytes(SEX.toString())); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.stateProvince)), Bytes.toBytes(STATE_PROV)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.waterBody)), Bytes.toBytes(WATERBODY)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.subgenus)), Bytes.toBytes(SUBGENUS)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.subgenusKey)), Bytes.toBytes(SUBGENUS_KEY)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.typeStatus)), Bytes.toBytes(TYPE_STATUS.toString())); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.typifiedName)), Bytes.toBytes(TYPIFIED_NAME)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifTerm.genericName)), Bytes.toBytes(GENERIC_NAME)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.specificEpithet)), Bytes.toBytes(SPECIFIC_EPITHET)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.infraspecificEpithet)), Bytes.toBytes(INFRA_SPECIFIC_EPITHET)); put.addColumn(CF, Bytes.toBytes(Columns.column(DwcTerm.taxonRank)), Bytes.toBytes(TAXON_RANK.name())); put.addColumn(CF, Bytes.toBytes(Columns.idColumn(0)), Bytes.toBytes(ID_0)); put.addColumn(CF, Bytes.toBytes(Columns.idTypeColumn(0)), Bytes.toBytes(ID_TYPE_0)); put.addColumn(CF, Bytes.toBytes(Columns.idColumn(1)), Bytes.toBytes(ID_1)); put.addColumn(CF, Bytes.toBytes(Columns.idTypeColumn(1)), Bytes.toBytes(ID_TYPE_1)); put.addColumn(CF, Bytes.toBytes(Columns.idColumn(2)), Bytes.toBytes(ID_2)); put.addColumn(CF, Bytes.toBytes(Columns.idTypeColumn(2)), Bytes.toBytes(ID_TYPE_2)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifInternalTerm.fragment)), Bytes.toBytes(XML)); for (DwcTerm term : DwcTerm.values()) { if (!term.isClass()) { put.addColumn(CF, Bytes.toBytes(Columns.verbatimColumn(term)), Bytes.toBytes("I am " + term.toString())); } } for (Term term : IucnTerm.values()) { put.addColumn(CF, Bytes.toBytes(Columns.verbatimColumn(term)), Bytes.toBytes("I am " + term.toString())); } for (DcTerm term : DcTerm.values()) { if (!term.isClass()) { put.addColumn(CF, Bytes.toBytes(Columns.verbatimColumn(term)), Bytes.toBytes("I am " + term.toString())); } } for (GbifTerm term : GbifTerm.values()) { if (term != GbifTerm.lastParsed && !term.isClass() && term != GbifTerm.coordinateAccuracy) { put.addColumn(CF, Bytes.toBytes(Columns.verbatimColumn(term)), Bytes.toBytes("I am " + term.toString())); } } Term term = TermFactory.instance().findTerm("fancyUnknownTerm"); put.addColumn(CF, Bytes.toBytes(Columns.column(term)), Bytes.toBytes("I am " + term.toString())); setUpIssues(); table.put(put); } } private void setUpIdentifiers() throws IOException { try (Table table = CONNECTION.getTable(TableName.valueOf(CFG.occTable))) { Put put = new Put(Bytes.toBytes(KEY)); put.addColumn(CF, Bytes.toBytes(Columns.idColumn(0)), Bytes.toBytes(ID_0)); put.addColumn(CF, Bytes.toBytes(Columns.idTypeColumn(0)), Bytes.toBytes(ID_TYPE_0)); put.addColumn(CF, Bytes.toBytes(Columns.idColumn(1)), Bytes.toBytes(ID_1)); put.addColumn(CF, Bytes.toBytes(Columns.idTypeColumn(1)), Bytes.toBytes(ID_TYPE_1)); put.addColumn(CF, Bytes.toBytes(Columns.idColumn(2)), Bytes.toBytes(ID_2)); put.addColumn(CF, Bytes.toBytes(Columns.idTypeColumn(2)), Bytes.toBytes(ID_TYPE_2)); put.addColumn(CF, Bytes.toBytes(Columns.column(GbifInternalTerm.identifierCount)), Bytes.toBytes(3)); table.put(put); } } private void setUpIssues() throws IOException { try (Table table = CONNECTION.getTable(TableName.valueOf(CFG.occTable))) { Put put = new Put(Bytes.toBytes(KEY)); for (OccurrenceIssue issue : OccurrenceIssue.values()) { put.addColumn(CF, Bytes.toBytes(Columns.column(issue)), Bytes.toBytes(1)); } table.put(put); } } @Test public void testGetFull() throws IOException { // setUpIdentifiers(); Occurrence occ = occurrenceService.get(KEY); assertEquivalence(occ); assertEquals((Integer) KEY, occ.getKey()); // assertEquals(3, occ.getIdentifiers().size()); assertEquals(OccurrenceIssue.values().length, occ.getIssues().size()); assertFalse(occ.hasVerbatimField(DwcTerm.basisOfRecord)); } @Test @Ignore("Identifiers removed from persistence until needed") public void testGetNoIdentifiers() throws IOException { Occurrence occ = occurrenceService.get(KEY); assertEquivalence(occ); assertEquals((Integer) KEY, occ.getKey()); assertEquals(0, occ.getIdentifiers().size()); } @Test public void testGetNoIssues() throws IOException { Occurrence raw = occurrenceService.get(KEY); assertEquivalence(raw); raw.setIssues(new HashSet<OccurrenceIssue>()); occurrenceService.update(raw); Occurrence occ = occurrenceService.get(KEY); assertEquals((Integer) KEY, occ.getKey()); assertEquals(0, occ.getIssues().size()); } @Test public void testGetNull() { Occurrence occ = occurrenceService.get(BAD_KEY); assertNull(occ); } @Test public void testUpdateFull() throws IOException { // update everything but unique identifier pieces Occurrence update = occurrenceService.get(KEY); Double coordinateUncertaintyInMeters = 50.55d; Date origLastParsed = update.getLastParsed(); double alt = 1234.2; BasisOfRecord bor = BasisOfRecord.OBSERVATION; int classId = 88; String clazz = "Monocots"; double depth = 120.8; // keep family name, but change its key: https://github.com/gbif/portal-feedback/issues/136 String family = "Felidae"; int familyId = 96578787; String genericName = "generic trillium"; String genus = "Trillium"; int genusId = 7878; Country publishingCountry = Country.ALBANIA; String infraEpithet = "infragrand"; Country country = Country.CANADA; String kingdom = "Plantae"; int kingdomId = 2; double lat = 46.344; double lng = -85.97087; Date mod = new Date(); int month = 3; int nubId = 8798333; Date occDate = new Date(); String order = "Liliales"; int orderId = 23434; String phylum = "Angiosperms"; int phylumId = 422; EndpointType protocol = EndpointType.TAPIR; String sciName = "Trillium grandiflorum"; String specificEpithet = "grandiflorum"; String species = "T. grandiflorum"; int speciesId = 3444; Rank taxonRank = Rank.CULTIVAR; int year = 1988; Date lastInterpreted = new Date(); update.setElevation(alt); update.setBasisOfRecord(bor); update.setClassKey(classId); update.setClazz(clazz); update.setDepth(depth); update.setFamily(family); update.setFamilyKey(familyId); update.setGenus(genus); update.setGenusKey(genusId); update.setCountry(country); update.setKingdom(kingdom); update.setKingdomKey(kingdomId); update.setLastInterpreted(lastInterpreted); update.setDecimalLatitude(lat); update.setDecimalLongitude(lng); update.setCoordinateUncertaintyInMeters(coordinateUncertaintyInMeters); update.setModified(mod); update.setMonth(month); update.setTaxonKey(nubId); update.setEventDate(occDate); update.setOrder(order); update.setOrderKey(orderId); update.setPhylum(phylum); update.setPhylumKey(phylumId); update.setProtocol(protocol); update.setPublishingCountry(publishingCountry); update.setScientificName(sciName); update.setSpecies(species); update.setSpeciesKey(speciesId); update.setYear(year); update.setSpecificEpithet(specificEpithet); update.setInfraspecificEpithet(infraEpithet); update.setGenericName(genericName); update.setTaxonRank(taxonRank); // String id0 = "http://www.ala.org.au"; // IdentifierType idType0 = IdentifierType.GBIF_NODE; // Identifier record = new Identifier(); // record.setIdentifier(id0); // record.setType(idType0); // List<Identifier> records = newArrayList(); // records.add(record); // update.setIdentifiers(records); Set<OccurrenceIssue> issues = update.getIssues(); issues.remove(OccurrenceIssue.ELEVATION_MIN_MAX_SWAPPED); issues.remove(OccurrenceIssue.ELEVATION_NON_NUMERIC); issues.remove(OccurrenceIssue.ZERO_COORDINATE); update.setIssues(issues); Map<Term, String> fields = Maps.newHashMap(); fields.put(DwcTerm.basisOfRecord, "PRESERVED_SPECIMEN"); update.setVerbatimFields(fields); occurrenceService.update(update); Occurrence occ = occurrenceService.get(KEY); Assert.assertNotNull(occ); assertTrue(alt == occ.getElevation()); assertEquals(bor, occ.getBasisOfRecord()); assertTrue(classId == occ.getClassKey()); assertEquals(clazz, occ.getClazz()); assertEquals(DATASET_KEY, occ.getDatasetKey()); assertTrue(depth == occ.getDepth()); assertEquals(family, occ.getFamily()); assertTrue(familyId == occ.getFamilyKey()); assertEquals(genus, occ.getGenus()); assertTrue(genusId == occ.getGenusKey()); assertEquals(publishingCountry, occ.getPublishingCountry()); assertTrue(update.getKey().intValue() == occ.getKey().intValue()); // assertEquals(1, occ.getIdentifiers().size()); // Identifier updatedRecord = occ.getIdentifiers().iterator().next(); // assertTrue(id0.equals(updatedRecord.getIdentifier())); // assertEquals(idType0, updatedRecord.getType()); assertEquals(country, occ.getCountry()); assertEquals(kingdom, occ.getKingdom()); assertEquals(kingdomId, (int) occ.getKingdomKey()); assertEquals(lastInterpreted, occ.getLastInterpreted()); assertEquals(lat, occ.getDecimalLatitude(), 0.0001); assertEquals(lng, occ.getDecimalLongitude(), 0.0001); assertEquals(coordinateUncertaintyInMeters, occ.getCoordinateUncertaintyInMeters()); assertEquals(mod, occ.getModified()); assertEquals(month, (int) occ.getMonth()); assertEquals(nubId, (int) occ.getTaxonKey()); assertEquals(occDate, occ.getEventDate()); assertEquals(order, occ.getOrder()); assertTrue(orderId == occ.getOrderKey()); assertEquals(PUBLISHING_ORG_KEY, occ.getPublishingOrgKey()); assertEquals(protocol, occ.getProtocol()); assertEquals(phylum, occ.getPhylum()); assertTrue(phylumId == occ.getPhylumKey()); assertTrue(sciName.equals(occ.getScientificName())); assertEquals(species, occ.getSpecies()); assertTrue(speciesId == occ.getSpeciesKey()); assertEquals(origLastParsed, occ.getLastParsed()); assertTrue(year == occ.getYear()); assertEquals(specificEpithet, occ.getSpecificEpithet()); assertEquals(infraEpithet, occ.getInfraspecificEpithet()); assertEquals(genericName, occ.getGenericName()); assertEquals(taxonRank, occ.getTaxonRank()); assertEquals(OccurrenceIssue.values().length, occ.getIssues().size() + 3); assertFalse(occ.getIssues().contains(OccurrenceIssue.ELEVATION_MIN_MAX_SWAPPED)); assertFalse(occ.getIssues().contains(OccurrenceIssue.ELEVATION_NON_NUMERIC)); assertFalse(occ.getIssues().contains(OccurrenceIssue.ZERO_COORDINATE)); assertTrue(occ.getIssues().contains(OccurrenceIssue.COUNTRY_COORDINATE_MISMATCH)); assertFalse(occ.hasVerbatimField(DwcTerm.basisOfRecord)); assertFalse(occ.hasVerbatimField(DwcTerm.occurrenceID)); } @Test public void testFragmentGood() { String fragment = occurrenceService.getFragment(KEY); assertEquals(XML, fragment); } @Test public void testFragmentNull() { String fragment = occurrenceService.getFragment(BAD_KEY); assertNull(fragment); } @Test public void testDeleteExists() throws IOException { Occurrence occ = occurrenceService.delete(KEY); assertEquivalence(occ); assertEquals((Integer) KEY, occ.getKey()); Occurrence test = occurrenceService.get(KEY); assertNull(test); } @Test public void testDeleteNotExists() { Occurrence occ = occurrenceService.delete(BAD_KEY); assertNull(occ); } @Test public void testKeyByColumnIterator() { int count = 0; Iterator<Integer> iterator = occurrenceService.getKeysByColumn(Bytes.toBytes(DATASET_KEY.toString()), Columns.column(GbifTerm.datasetKey)); while (iterator.hasNext()) { iterator.next(); count++; } assertEquals(1, count); } @Test public void testGetVerbatim() { VerbatimOccurrence expected = new VerbatimOccurrence(); expected.setKey(KEY); expected.setDatasetKey(DATASET_KEY); expected.setPublishingOrgKey(PUBLISHING_ORG_KEY); expected.setPublishingCountry(PUB_COUNTRY); expected.setLastCrawled(LAST_CRAWLED); expected.setLastParsed(LAST_PARSED); expected.setCrawlId(CRAWL_ID); expected.setProtocol(PROTOCOL); addTerms(expected, TERM_VALUE_PREFIX); assertTrue(expected.hasVerbatimField(DwcTerm.basisOfRecord)); VerbatimOccurrence verb = occurrenceService.getVerbatim(KEY); assertNotNull(verb); assertNotNull(verb.getLastParsed()); assertEquivalence(expected, verb); assertTrue(verb.hasVerbatimField(DwcTerm.basisOfRecord)); Term term = TermFactory.instance().findTerm("fancyUnknownTerm"); assertTrue(verb.hasVerbatimField(term)); } @Test public void testGetVerbatimNull() { VerbatimOccurrence verb = occurrenceService.getVerbatim(BAD_KEY); assertNull(verb); } @Test public void testUpdateVerbatim() { VerbatimOccurrence orig = occurrenceService.getVerbatim(KEY); orig.setPublishingCountry(Country.VENEZUELA); orig.setPublishingOrgKey(UUID.randomUUID()); orig.setProtocol(EndpointType.DIGIR_MANIS); orig.setLastParsed(new Date()); addTerms(orig, "I was "); occurrenceService.update(orig); VerbatimOccurrence got = occurrenceService.getVerbatim(KEY); assertNotNull(got); assertNotNull(got.getLastParsed()); assertEquivalence(orig, got); } @Test public void testUpdateVerbatimMultimedia() { VerbatimOccurrence orig = occurrenceService.getVerbatim(KEY); orig.setPublishingCountry(Country.VENEZUELA); orig.setPublishingOrgKey(UUID.randomUUID()); orig.setProtocol(EndpointType.DIGIR_MANIS); orig.setLastParsed(new Date()); Map<Extension, List<Map<Term, String>>> extensions = Maps.newHashMap(); List<Map<Term, String>> mediaExtensions = Lists.newArrayList(); Map<Term, String> verbatimRecord = new HashMap<Term, String>(); verbatimRecord.put(DcTerm.created, IsoDateFormat.FULL.getDateFormat().format(new Date())); verbatimRecord.put(DcTerm.creator, "fede"); verbatimRecord.put(DcTerm.description, "testDescription"); verbatimRecord.put(DcTerm.format, "jpeg"); verbatimRecord.put(DcTerm.license, "licenseTest"); verbatimRecord.put(DcTerm.publisher, "publisherTest"); verbatimRecord.put(DcTerm.title, "titleTest"); verbatimRecord.put(DcTerm.identifier, "http://www.gbif.org/logo.jpg"); mediaExtensions.add(verbatimRecord); extensions.put(Extension.MULTIMEDIA, mediaExtensions); orig.setExtensions(extensions); occurrenceService.update(orig); VerbatimOccurrence got = occurrenceService.getVerbatim(KEY); assertNotNull(got); assertEquals(got.getExtensions(), orig.getExtensions()); } /** * Test the cycle: create a verbtim record, update it and add extension. */ @Test public void testUpdateVerbatimMultimediaUpdate() { VerbatimOccurrence orig = occurrenceService.getVerbatim(KEY); orig.setPublishingCountry(Country.VENEZUELA); orig.setPublishingOrgKey(UUID.randomUUID()); orig.setProtocol(EndpointType.DIGIR_MANIS); orig.setLastParsed(new Date()); Map<Extension, List<Map<Term, String>>> extensions = Maps.newHashMap(); List<Map<Term, String>> mediaExtensions = Lists.newArrayList(); Map<Term, String> verbatimRecord = new HashMap<Term, String>(); verbatimRecord.put(DcTerm.created, IsoDateFormat.FULL.getDateFormat().format(new Date())); verbatimRecord.put(DcTerm.creator, "gbifuser"); verbatimRecord.put(DcTerm.description, "testDescription"); verbatimRecord.put(DcTerm.format, "jpeg"); verbatimRecord.put(DcTerm.license, "licenseTest"); verbatimRecord.put(DcTerm.publisher, "publisherTest"); verbatimRecord.put(DcTerm.title, "titleTest"); verbatimRecord.put(DcTerm.identifier, "http://www.gbif.org/logo.jpg"); mediaExtensions.add(verbatimRecord); extensions.put(Extension.MULTIMEDIA, mediaExtensions); orig.setExtensions(extensions); occurrenceService.update(orig); Occurrence intOcc = occurrenceService.get(KEY); intOcc.setCountry(Country.ANGOLA); Map<Extension, List<Map<Term, String>>> extensions2 = Maps.newHashMap(); List<Map<Term, String>> mediaExtensions2 = Lists.newArrayList(); Map<Term, String> verbatimRecord2 = new HashMap<Term, String>(); verbatimRecord.put(DcTerm.created, IsoDateFormat.FULL.getDateFormat().format(new Date())); verbatimRecord.put(DcTerm.creator, "gbifuser2"); verbatimRecord.put(DcTerm.description, "testDescription2"); verbatimRecord.put(DcTerm.format, "jpeg"); verbatimRecord.put(DcTerm.license, "licenseTest2"); verbatimRecord.put(DcTerm.publisher, "publisherTest2"); verbatimRecord.put(DcTerm.title, "titleTest2"); verbatimRecord.put(DcTerm.identifier, "http://www.gbif.org/logo2.jpg"); mediaExtensions2.add(verbatimRecord); mediaExtensions2.add(verbatimRecord2); extensions2.put(Extension.MULTIMEDIA, mediaExtensions2); orig.setExtensions(extensions2); occurrenceService.update(intOcc); occurrenceService.update(orig); VerbatimOccurrence got = occurrenceService.getVerbatim(KEY); assertNotNull(got); assertEquals(got.getExtensions(), orig.getExtensions()); } @Test public void testUpdateVerbatimRemovingFields() { VerbatimOccurrence orig = occurrenceService.getVerbatim(KEY); orig.setPublishingCountry(Country.VENEZUELA); orig.setPublishingOrgKey(UUID.randomUUID()); orig.setLastCrawled(new Date()); orig.setProtocol(EndpointType.DIGIR_MANIS); addTerms(orig, "I was "); Map<Term, String> fields = orig.getVerbatimFields(); fields.remove(DwcTerm.acceptedNameUsage); fields.remove(DcTerm.accessRights); fields.remove(IucnTerm.threatStatus); orig.setVerbatimFields(fields); occurrenceService.update(orig); VerbatimOccurrence got = occurrenceService.getVerbatim(KEY); assertNotNull(got); assertEquivalence(orig, got); assertNull(got.getVerbatimField(DwcTerm.acceptedNameUsage)); assertNull(got.getVerbatimField(DcTerm.accessRights)); assertNull(got.getVerbatimField(IucnTerm.threatStatus)); } @Test public void testVerbRowMutations() { // identical shouldn't do anything VerbatimOccurrence occ = occurrenceService.getVerbatim(KEY); RowMutations mutations = occurrenceService.buildRowUpdate(occ).getRowMutations(); assertEquals(0, mutations.getMutations().size()); // one put and three deletes occ = occurrenceService.getVerbatim(KEY); occ.setLastParsed(new Date()); occ.setVerbatimField(DwcTerm.acceptedNameUsage, null); occ.setVerbatimField(DcTerm.accessRights, null); occ.setVerbatimField(GbifTerm.distanceAboveSurface, null); mutations = occurrenceService.buildRowUpdate(occ).getRowMutations(); assertEquals(4, mutations.getMutations().size()); } @Test public void testOccRowMutations() { // identical shouldn't do anything Occurrence occ = occurrenceService.get(KEY); RowMutations mutations = occurrenceService.buildRowUpdate(occ).getRowMutations(); assertEquals(0, mutations.getMutations().size()); // one put and three deletes occ = occurrenceService.get(KEY); occ.setLastInterpreted(new Date()); occ.setVerbatimField(DwcTerm.acceptedNameUsage, null); occ.setVerbatimField(DcTerm.accessRights, null); occ.setVerbatimField(GbifTerm.ageInDays, null); mutations = occurrenceService.buildRowUpdate(occ).getRowMutations(); assertEquals(4, mutations.getMutations().size()); } @Test public void testMultimediaExtension() throws MalformedURLException { Date now = new Date(); URI url = URI.create("http://www.comos.com/images/image1.jpeg"); URI refs = URI.create("http://www.cosmos.com"); URI url2 = URI.create("http://www.comos.com/images/image2.jpeg"); URI refs2 = URI.create("http://www.cosmos2.com"); Occurrence occ = occurrenceService.get(KEY); List<MediaObject> media = Lists.newArrayList(); MediaObject image1 = new MediaObject(); image1.setCreated(now); image1.setCreator("Carl Sagan"); image1.setDescription("The beauty of nature."); image1.setFormat("jpeg"); image1.setLicense("CC-BY"); image1.setPublisher("Nature"); image1.setReferences(refs); image1.setTitle("Beauty"); image1.setType(MediaType.StillImage); image1.setIdentifier(url); media.add(image1); MediaObject image2 = new MediaObject(); image2.setCreated(now); image2.setCreator("Carl Sagan"); image2.setDescription("The 2nd beauty of nature."); image2.setFormat("jpeg"); image2.setLicense("CC-BY"); image2.setPublisher("Nature"); image2.setReferences(refs2); image2.setTitle("Beauty"); image2.setType(MediaType.StillImage); image2.setIdentifier(url2); media.add(image2); occ.setMedia(media); occurrenceService.update(occ); Occurrence got = occurrenceService.get(KEY); assertNotNull(got); assertEquals(got.getMedia().size(), got.getMedia().size()); assertEquals(got.getMedia().get(0).toString(), image1.toString()); assertEquals(got.getMedia().get(0).getReferences(), image1.getReferences()); assertEquals(got.getMedia().get(0).getIdentifier(), image1.getIdentifier()); assertEquals(got.getMedia().get(0).getType(), image1.getType()); assertEquals(got.getMedia().get(1).toString(), image2.toString()); // update media = Lists.newArrayList(); image1.setTitle("Updated title"); media.add(image1); image2.setTitle("Update 2nd title"); media.add(image2); got.setMedia(media); occurrenceService.update(got); Occurrence another = occurrenceService.get(KEY); assertNotNull(another); assertEquals(another.getMedia().size(), another.getMedia().size()); assertEquals(another.getMedia().get(0).toString(), image1.toString()); assertEquals(another.getMedia().get(0).getReferences(), image1.getReferences()); assertEquals(another.getMedia().get(0).getIdentifier(), image1.getIdentifier()); assertEquals(another.getMedia().get(0).getType(), image1.getType()); assertEquals(another.getMedia().get(1).toString(), image2.toString()); } private void addTerms(VerbatimOccurrence occ, String prefix) { Map<Term, String> fields = Maps.newHashMap(); for (DwcTerm term : DwcTerm.values()) { if (!term.isClass()) { fields.put(term, prefix + term.toString()); } } for (GbifTerm term : GbifTerm.values()) { if (!term.isClass()) { fields.put(term, prefix + term.toString()); } } for (Term term : IucnTerm.values()) { fields.put(term, prefix + term.toString()); } for (DcTerm term : DcTerm.values()) { if (!term.isClass()) { fields.put(term, prefix + term.toString()); } } Term term = TermFactory.instance().findTerm("fancyUnknownTerm"); fields.put(term, prefix + term.toString()); occ.setVerbatimFields(fields); } private void assertEquivalence(Occurrence occ) { assertNotNull(occ); assertEquals((Double) ELEV, occ.getElevation()); assertEquals(ELEV_ACC, occ.getElevationAccuracy()); assertEquals(BOR, occ.getBasisOfRecord()); assertEquals(UNCERTAINTY_METERS, occ.getCoordinateUncertaintyInMeters()); assertEquals(CONTINENT, occ.getContinent()); assertEquals(COUNTRY, occ.getCountry()); assertEquals(DATASET_KEY, occ.getDatasetKey()); assertEquals(DATE_IDENTIFIED, occ.getDateIdentified()); assertEquals(DAY, occ.getDay()); assertEquals((Double) DEPTH, occ.getDepth()); assertEquals(DEPTH_ACC, occ.getDepthAccuracy()); assertEquals(ESTAB_MEANS, occ.getEstablishmentMeans()); assertEquals(EVENT_DATE, occ.getEventDate()); assertEquals(GEO_DATUM, occ.getGeodeticDatum()); assertEquals(PUB_COUNTRY, occ.getPublishingCountry()); assertEquals(INDIVIDUAL_COUNT, occ.getIndividualCount()); assertEquals(LAST_INTERPRETED, occ.getLastInterpreted()); assertEquals(LAT, occ.getDecimalLatitude(), 0.0001); assertEquals(LIFE_STAGE, occ.getLifeStage()); assertEquals(LNG, occ.getDecimalLongitude(), 0.0001); assertEquals(MOD, occ.getModified()); assertEquals((Integer) MONTH, occ.getMonth()); assertEquals(PUBLISHING_ORG_KEY, occ.getPublishingOrgKey()); assertEquals(PROTOCOL, occ.getProtocol()); assertEquals(SEX, occ.getSex()); assertEquals(STATE_PROV, occ.getStateProvince()); assertEquals(WATERBODY, occ.getWaterBody()); assertEquals((Integer) YEAR, occ.getYear()); // taxonomy assertEquals(KINGDOM, occ.getKingdom()); assertEquals(PHYLUM, occ.getPhylum()); assertEquals(CLASS, occ.getClazz()); assertEquals(ORDER, occ.getOrder()); assertEquals(FAMILY, occ.getFamily()); assertEquals(GENUS, occ.getGenus()); assertEquals(SUBGENUS, occ.getSubgenus()); assertEquals(SPECIES, occ.getSpecies()); assertEquals(GENERIC_NAME, occ.getGenericName()); assertEquals(SPECIFIC_EPITHET, occ.getSpecificEpithet()); assertEquals(INFRA_SPECIFIC_EPITHET, occ.getInfraspecificEpithet()); assertEquals(TAXON_RANK, occ.getTaxonRank()); assertEquals(SCI_NAME, occ.getScientificName()); assertEquals((Integer) TAXON_KEY, occ.getTaxonKey()); assertEquals((Integer) KINGDOM_ID, occ.getKingdomKey()); assertEquals((Integer) PHYLUM_KEY, occ.getPhylumKey()); assertEquals((Integer) CLASS_ID, occ.getClassKey()); assertEquals((Integer) ORDER_KEY, occ.getOrderKey()); assertEquals((Integer) FAMILY_KEY, occ.getFamilyKey()); assertEquals((Integer) GENUS_KEY, occ.getGenusKey()); assertEquals(SUBGENUS_KEY, occ.getSubgenusKey()); assertEquals((Integer) SPECIES_KEY, occ.getSpeciesKey()); // type assertEquals(TYPE_STATUS, occ.getTypeStatus()); assertEquals(TYPIFIED_NAME, occ.getTypifiedName()); // issues Set<OccurrenceIssue> occIssues = occ.getIssues(); for (OccurrenceIssue issue : OccurrenceIssue.values()) { assertTrue(occIssues.contains(issue)); } } private void assertEquivalence(VerbatimOccurrence a, VerbatimOccurrence b) { assertEquals(a.getKey(), b.getKey()); assertEquals(a.getDatasetKey(), b.getDatasetKey()); assertEquals(a.getLastCrawled(), b.getLastCrawled()); assertEquals(a.getLastParsed(), b.getLastParsed()); assertEquals(a.getProtocol(), b.getProtocol()); assertEquals(a.getPublishingCountry(), b.getPublishingCountry()); assertEquals(a.getPublishingOrgKey(), b.getPublishingOrgKey()); assertEquals(a.getCrawlId(), b.getCrawlId()); for (DwcTerm term : DwcTerm.values()) { if (!term.isClass()) { assertEquals(a.getVerbatimField(term), b.getVerbatimField(term)); } } } }