package org.gbif.occurrence.processor.identifiers;
import org.gbif.api.model.crawler.DwcaValidationReport;
import org.gbif.api.model.crawler.OccurrenceValidationReport;
import org.gbif.api.vocabulary.OccurrenceSchemaType;
import java.util.UUID;
import org.junit.Test;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
public class IdentifierStrategyTest {
@Test
public void testGoodTripletBadOcc() {
IdentifierStrategy strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA,
new DwcaValidationReport(UUID.randomUUID(), new OccurrenceValidationReport(100, 100, 0, 0, 100, true)));
assertTrue(strategy.isTripletsValid());
assertFalse(strategy.isOccurrenceIdsValid());
}
@Test
public void testGoodTripletGoodOcc() {
IdentifierStrategy strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA,
new DwcaValidationReport(UUID.randomUUID(), new OccurrenceValidationReport(100, 100, 0, 100, 0, true)));
assertTrue(strategy.isTripletsValid());
assertTrue(strategy.isOccurrenceIdsValid());
}
@Test
public void testDupeTripletGoodOcc() {
IdentifierStrategy strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA,
new DwcaValidationReport(UUID.randomUUID(), new OccurrenceValidationReport(100, 80, 0, 100, 0, true)));
assertFalse(strategy.isTripletsValid());
assertTrue(strategy.isOccurrenceIdsValid());
}
@Test
public void testInvalidTripletGoodOcc() {
IdentifierStrategy strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA,
new DwcaValidationReport(UUID.randomUUID(), new OccurrenceValidationReport(100, 70, 30, 100, 0, true)));
// as long as triplets are unique we leave the threshold of when there are too many invalid triplets to the DwcaValidationReport
assertTrue(strategy.isTripletsValid());
assertTrue(strategy.isOccurrenceIdsValid());
}
@Test
public void testNonDwca() {
IdentifierStrategy strategy = new IdentifierStrategy(OccurrenceSchemaType.ABCD_2_0_6, null);
assertTrue(strategy.isTripletsValid());
assertTrue(strategy.isOccurrenceIdsValid());
}
@Test
public void testStrategies() {
// good triplets, no occ
int checked = 100;
int uniqueTriplets = 100;
int invalidTriplets = 0;
int uniqueOccIds = 0;
int missingOccIds = 100;
DwcaValidationReport report = new DwcaValidationReport(UUID.randomUUID(),
new OccurrenceValidationReport(checked, uniqueTriplets, invalidTriplets, uniqueOccIds, missingOccIds, true));
IdentifierStrategy strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA, report);
assertTrue(strategy.isTripletsValid());
assertFalse(strategy.isOccurrenceIdsValid());
// good triplets, good occ
checked = 100;
uniqueTriplets = 100;
invalidTriplets = 0;
uniqueOccIds = 100;
missingOccIds = 0;
report = new DwcaValidationReport(UUID.randomUUID(),
new OccurrenceValidationReport(checked, uniqueTriplets, invalidTriplets, uniqueOccIds, missingOccIds, true));
strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA, report);
assertTrue(strategy.isTripletsValid());
assertTrue(strategy.isOccurrenceIdsValid());
// dupe triplets, dupe occ
checked = 100;
uniqueTriplets = 80;
invalidTriplets = 0;
uniqueOccIds = 60;
missingOccIds = 0;
report = new DwcaValidationReport(UUID.randomUUID(),
new OccurrenceValidationReport(checked, uniqueTriplets, invalidTriplets, uniqueOccIds, missingOccIds, true));
strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA, report);
assertFalse(strategy.isTripletsValid());
assertFalse(strategy.isOccurrenceIdsValid());
// some invalid triplets but unique matches, some invalid occ, but unique matches
checked = 100;
uniqueTriplets = 80;
invalidTriplets = 20;
uniqueOccIds = 20;
missingOccIds = 80;
report = new DwcaValidationReport(UUID.randomUUID(),
new OccurrenceValidationReport(checked, uniqueTriplets, invalidTriplets, uniqueOccIds, missingOccIds, true));
strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA, report);
assertTrue(strategy.isTripletsValid());
assertFalse(strategy.isOccurrenceIdsValid());
// invalid plus dupe triplets, good occ
checked = 100;
uniqueTriplets = 50;
invalidTriplets = 20;
uniqueOccIds = 100;
missingOccIds = 0;
report = new DwcaValidationReport(UUID.randomUUID(),
new OccurrenceValidationReport(checked, uniqueTriplets, invalidTriplets, uniqueOccIds, missingOccIds, true));
strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA, report);
assertFalse(strategy.isTripletsValid());
assertTrue(strategy.isOccurrenceIdsValid());
// good triplets, invalid and dupe occ
checked = 100;
uniqueTriplets = 100;
invalidTriplets = 0;
uniqueOccIds = 80;
missingOccIds = 5;
report = new DwcaValidationReport(UUID.randomUUID(),
new OccurrenceValidationReport(checked, uniqueTriplets, invalidTriplets, uniqueOccIds, missingOccIds, true));
strategy = new IdentifierStrategy(OccurrenceSchemaType.DWCA, report);
assertTrue(strategy.isTripletsValid());
assertFalse(strategy.isOccurrenceIdsValid());
}
}