package org.gbif.occurrence.processor.parsing;
import org.gbif.api.model.occurrence.VerbatimOccurrence;
import org.gbif.api.vocabulary.EndpointType;
import org.gbif.api.vocabulary.Extension;
import org.gbif.api.vocabulary.OccurrenceSchemaType;
import org.gbif.dwc.terms.DcTerm;
import org.gbif.dwc.terms.DwcTerm;
import org.gbif.dwc.terms.GbifInternalTerm;
import org.gbif.dwc.terms.GbifTerm;
import org.gbif.dwc.terms.Term;
import org.gbif.occurrence.persistence.api.Fragment;
import java.io.IOException;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import com.google.common.io.Resources;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.commons.io.Charsets;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
public class FragmentParserTest {
private static String abcd206Single;
private static String dwc14;
@BeforeClass
public static void preClass() throws IOException {
abcd206Single = Resources.toString(Resources.getResource("abcd206_single.xml"), Charsets.UTF_8);
dwc14 = Resources.toString(Resources.getResource("dwc14.xml"), Charsets.UTF_8);
}
@Test
public void testAbcd206() {
OccurrenceSchemaType schema = OccurrenceSchemaType.ABCD_2_0_6;
UUID datasetKey = UUID.randomUUID();
Fragment frag = new Fragment(datasetKey, abcd206Single.getBytes(), DigestUtils.md5(abcd206Single.getBytes()),
Fragment.FragmentType.XML, EndpointType.BIOCASE, new Date(), 1, schema, null, null);
frag.setKey(1);
VerbatimOccurrence got = FragmentParser.parse(frag);
assertNotNull(got);
assertEquals("BGBM", got.getVerbatimField(DwcTerm.institutionCode));
assertEquals("AlgaTerra", got.getVerbatimField(DwcTerm.collectionCode));
assertEquals("5834", got.getVerbatimField(DwcTerm.catalogNumber));
assertEquals(datasetKey, got.getDatasetKey());
assertNull(got.getVerbatimField(GbifInternalTerm.unitQualifier));
assertEquals(1, got.getKey().intValue());
assertEquals("Tetraedron caudatum (Corda) Hansg.", got.getVerbatimField(DwcTerm.scientificName));
assertEquals("52.123456", got.getVerbatimField(DwcTerm.decimalLatitude));
assertEquals("13.123456", got.getVerbatimField(DwcTerm.decimalLongitude));
assertEquals("50", got.getVerbatimField(DwcTerm.coordinateUncertaintyInMeters));
assertEquals("400", got.getVerbatimField(DwcTerm.minimumElevationInMeters));
assertEquals("500", got.getVerbatimField(DwcTerm.maximumElevationInMeters));
assertEquals("DE", got.getVerbatimField(DwcTerm.country));
assertEquals("Kusber, W.-H.", got.getVerbatimField(DwcTerm.recordedBy));
assertEquals("Nikolassee, Berlin", got.getVerbatimField(DwcTerm.locality));
assertEquals("1987-04-13T00:00:00", got.getVerbatimField(DwcTerm.eventDate));
assertEquals("HumanObservation", got.getVerbatimField(DwcTerm.basisOfRecord));
assertEquals("Kusber, W.-H.", got.getVerbatimField(DwcTerm.identifiedBy));
assertEquals("Holotype", got.getVerbatimField(DwcTerm.typeStatus));
assertEquals("Tetraedron caudatum (Corda) Hansg.", got.getVerbatimField(GbifTerm.typifiedName));
assertNotNull(got.getExtensions().get(Extension.MULTIMEDIA));
List<Map<Term,String>> mediaObjects = got.getExtensions().get(Extension.MULTIMEDIA);
assertEquals(2, mediaObjects.size());
Map<Term,String> medium = mediaObjects.get(0);
assertEquals("http://www.tierstimmenarchiv.de/recordings/Ailuroedus_buccoides_V2010_04_short.mp3",
medium.get(DcTerm.identifier));
assertEquals("http://www.tierstimmenarchiv.de/webinterface/contents/showdetails.php?edit=-1&unique_id=TSA:Ailuroedus_buccoides_V_2010_4_1&autologin=true",
medium.get(DcTerm.references));
assertEquals("audio/mp3", medium.get(DcTerm.format));
assertEquals("CC BY-NC-ND (Attribution for non commercial use only and without derivative)",
medium.get(DcTerm.license));
}
@Test
public void testDwc14() {
OccurrenceSchemaType schema = OccurrenceSchemaType.DWC_1_4;
UUID datasetKey = UUID.randomUUID();
Fragment frag =
new Fragment(datasetKey, dwc14.getBytes(), DigestUtils.md5(dwc14.getBytes()), Fragment.FragmentType.XML,
EndpointType.DIGIR, new Date(), 1, schema, null, null);
frag.setKey(123);
VerbatimOccurrence got = FragmentParser.parse(frag);
assertNotNull(got);
assertEquals("UGENT", got.getVerbatimField(DwcTerm.institutionCode));
assertEquals("vertebrata", got.getVerbatimField(DwcTerm.collectionCode));
assertEquals("50058", got.getVerbatimField(DwcTerm.catalogNumber));
assertEquals(datasetKey, got.getDatasetKey());
assertNull(got.getVerbatimField(GbifInternalTerm.unitQualifier));
assertEquals("Alouatta villosa Gray, 1845", got.getVerbatimField(DwcTerm.scientificName));
assertEquals("Gray, 1845", got.getVerbatimField(DwcTerm.scientificNameAuthorship));
assertEquals("Animalia", got.getVerbatimField(DwcTerm.kingdom));
assertEquals("Chordata", got.getVerbatimField(DwcTerm.phylum));
assertEquals("Mammalia", got.getVerbatimField(DwcTerm.class_));
assertEquals("Primates", got.getVerbatimField(DwcTerm.order));
assertEquals("Atelidae", got.getVerbatimField(DwcTerm.family));
assertEquals("Alouatta", got.getVerbatimField(DwcTerm.genus));
assertEquals("villosa", got.getVerbatimField(DwcTerm.specificEpithet));
assertEquals("25", got.getVerbatimField(DwcTerm.coordinateUncertaintyInMeters));
assertEquals("200", got.getVerbatimField(DwcTerm.minimumElevationInMeters));
assertEquals("400", got.getVerbatimField(DwcTerm.maximumElevationInMeters));
assertEquals("PreservedSpecimen", got.getVerbatimField(DwcTerm.basisOfRecord));
assertEquals("123", got.getVerbatimField(GbifTerm.gbifID));
assertEquals("Holotype", got.getVerbatimField(DwcTerm.typeStatus));
}
}