package uk.ac.ox.zoo.seeg.abraid.mp.dataacquisition.acquirers.csv; import org.apache.commons.io.FileUtils; import org.joda.time.DateTime; import org.joda.time.DateTimeUtils; import org.junit.Before; import org.junit.Test; import org.kubek2k.springockito.annotations.ReplaceWithMock; import org.springframework.beans.factory.annotation.Autowired; import uk.ac.ox.zoo.seeg.abraid.mp.common.dao.DiseaseGroupDao; import uk.ac.ox.zoo.seeg.abraid.mp.common.dao.DiseaseOccurrenceDao; import uk.ac.ox.zoo.seeg.abraid.mp.common.domain.*; import uk.ac.ox.zoo.seeg.abraid.mp.dataacquisition.AbstractDataAcquisitionSpringIntegrationTests; import uk.ac.ox.zoo.seeg.abraid.mp.dataacquisition.acquirers.ManualValidationEnforcer; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.Comparator; import java.util.List; import static org.assertj.core.api.Assertions.assertThat; /** * Integration tests for the CsvDataAcquirer class. * * Copyright (c) 2014 University of Oxford */ public class CsvDataAcquirerIntegrationTest extends AbstractDataAcquisitionSpringIntegrationTests { private static final String CSV_HEADER = "Site,Longitude,Latitude,Precision,Country,Disease,Occurrence Date,Feed,Summary,URL,Alert Title\n"; private static final String CSV_OCCURRENCE1 = "\"Kuala Lumpur, Federal Territory of Kuala Lumpur, Malaysia\",101.7,3.16667,precise,Malaysia,dengue,10/3/2014,\"SEEG Data 2014\",,onm.php?id=XX_ALERT_ID_XX,\"Dengue -- Kuala Lumpur, Malaysia\"\n"; private static final String CSV_OCCURRENCE2 = "New Zealand,176.61475,-38.53923,Country,New Zealand,dengue,13/01/2014,SEEG Data 2014,\"SPC says the number of dengue fever outbreaks in the Paific over the past year is unprecedented and more research needs to be done into its cause. Duration: 3′ 21″. Play now; Download: Ogg | MP3 ;...\",,Regional dengue outbreak unprecedented - SPC - Radio New Zealand\n"; private static final String TEST_FOLDER = "DataAcquisition/test/uk/ac/ox/zoo/seeg/abraid/mp/dataacquisition/acquirers/csv"; private static final String TEST_ISO_8859_1_FILE = "dengue_iso-8859-1.csv"; @Autowired private CsvDataAcquirer csvDataAcquirer; @ReplaceWithMock @Autowired private ManualValidationEnforcer manualValidationEnforcer; @Autowired private DiseaseOccurrenceDao diseaseOccurrenceDao; @Autowired private DiseaseGroupDao diseaseGroupDao; @Before public void setup() { // Sun, 27 Apr 2014 09:45:41 // CSV_OCCURRENCE1, CSV_OCCURRENCE2 and TEST_ISO_8859_1_FILE all have occurrence dates less than 1 year prior. DateTimeUtils.setCurrentMillisFixed(1398591941000L); } @Test public void acquireIsSuccessful() { List<DiseaseOccurrence> occurrences = acquire(false, false, null); assertNormalValidationParameters(occurrences.get(0)); assertNormalValidationParameters(occurrences.get(1)); } @Test public void acquireGoldStandardIsSuccessful() { List<DiseaseOccurrence> occurrences = acquire(false, true, null); assertGoldStandardValidationParameters(occurrences.get(0)); assertGoldStandardValidationParameters(occurrences.get(1)); } @Test public void acquireBiasIsSuccessful() { DiseaseGroup disease = diseaseGroupDao.getById(87); List<DiseaseOccurrence> occurrences = acquire(true, false, disease); assertBiasParameters(occurrences.get(0), disease); assertBiasParameters(occurrences.get(1), disease); } @Test public void acquireISO88591FileIsSuccessful() throws IOException { // Arrange byte[] csv = FileUtils.readFileToByteArray(new File(TEST_FOLDER, TEST_ISO_8859_1_FILE)); // Act List<String> messages = csvDataAcquirer.acquireDataFromCsv(csv, false, false, null); // Assert assertThat(messages).hasSize(3); // Windows-1252 is a superset of ISO-8859-1 assertThat(messages.get(0)).isEqualTo("Detected character set windows-1252, converting to UTF-8."); assertThat(messages.get(1)).isEqualTo("Found 1 CSV file line(s) to convert."); // If the CSV file was saved without error, the special character in Côte d'Ivoire was interpreted correctly. // We don't check location.country_gaul_code as it will be null if shapefile geometries are not in the database. assertThat(messages.get(2)).contains("Saved 1 disease occurrence(s) in 1 location(s)"); } @Test public void acquireFailsOnFirstAndThirdLines() { // Arrange String csvString = CSV_HEADER + "Test site 1\n" + CSV_OCCURRENCE1 + "Test site 2, 20.5\n"; byte[] csv = csvString.getBytes(); // Act List<String> messages = csvDataAcquirer.acquireDataFromCsv(csv, false, false, null); // Assert assertThat(messages).hasSize(4); assertThat(messages.get(0)).isEqualTo("Found 3 CSV file line(s) to convert."); assertThat(messages.get(1)).isEqualTo("Saved 1 disease occurrence(s) in 1 location(s) (of which 1 location(s) passed QC)."); assertThat(messages.get(2)).isEqualTo("Error in CSV file on line 2: Longitude is missing."); assertThat(messages.get(3)).isEqualTo("Error in CSV file on line 4: Latitude is missing."); } private void assertNormalValidationParameters(DiseaseOccurrence occurrence) { assertThat(occurrence.getStatus()).isEqualTo(DiseaseOccurrenceStatus.READY); assertThat(occurrence.getFinalWeightingExcludingSpatial()).isNull(); assertThat(occurrence.getFinalWeighting()).isNull(); assertThat(occurrence.getAlert().getFeed().getProvenance().getName()).isEqualTo("Manual dataset"); assertThat(occurrence.getBiasDisease()).isNull(); } private void assertGoldStandardValidationParameters(DiseaseOccurrence occurrence) { assertThat(occurrence.getStatus()).isEqualTo(DiseaseOccurrenceStatus.READY); assertThat(occurrence.getFinalWeightingExcludingSpatial()).isEqualTo(1.0); assertThat(occurrence.getFinalWeighting()).isEqualTo(1.0); assertThat(occurrence.getAlert().getFeed().getProvenance().getName()).isEqualTo("Manual gold standard dataset"); assertThat(occurrence.getBiasDisease()).isNull(); } private void assertBiasParameters(DiseaseOccurrence occurrence, DiseaseGroup biasDisease) { assertThat(occurrence.getStatus()).isEqualTo(DiseaseOccurrenceStatus.BIAS); assertThat(occurrence.getFinalWeightingExcludingSpatial()).isNull(); assertThat(occurrence.getFinalWeighting()).isNull(); assertThat(occurrence.getAlert().getFeed().getProvenance().getName()).isEqualTo("Manual dataset"); assertThat(occurrence.getBiasDisease()).isEqualTo(biasDisease); } private List<DiseaseOccurrence> acquire(boolean isBias, boolean isGoldStandard, DiseaseGroup biasDisease) { String csvString = CSV_HEADER + CSV_OCCURRENCE1 + CSV_OCCURRENCE2; byte[] csv = csvString.getBytes(); List<String> messages = csvDataAcquirer.acquireDataFromCsv(csv, isBias, isGoldStandard, biasDisease); assertThat(messages).hasSize(2); assertThat(messages.get(0)).isEqualTo("Found 2 CSV file line(s) to convert."); assertThat(messages.get(1)).isEqualTo( "Saved 2 disease occurrence(s) in 2 location(s) (of which 2 location(s) passed QC)."); List<DiseaseOccurrence> occurrences = getLastTwoDiseaseOccurrences(); assertFirstOccurrence(occurrences.get(0)); assertSecondOccurrence(occurrences.get(1)); return occurrences; } private void assertFirstOccurrence(DiseaseOccurrence occurrence) { Location occurrence1Location = occurrence.getLocation(); assertThat(occurrence1Location.getName()).isEqualTo("Kuala Lumpur, Federal Territory of Kuala Lumpur, Malaysia"); assertThat(occurrence1Location.getGeom().getX()).isEqualTo(101.7); assertThat(occurrence1Location.getGeom().getY()).isEqualTo(3.16667); assertThat(occurrence1Location.getPrecision()).isEqualTo(LocationPrecision.PRECISE); assertThat(occurrence1Location.getGeoNameId()).isNull(); assertThat(occurrence1Location.getHealthMapCountryId()).isNull(); assertThat(occurrence1Location.getCreatedDate()).isNotNull(); assertThat(occurrence1Location.hasPassedQc()).isTrue(); assertThat(occurrence1Location.getAdminUnitQCGaulCode()).isNull(); assertThat(occurrence1Location.getAdminUnitGlobalGaulCode()).isEqualTo(153); assertThat(occurrence1Location.getAdminUnitTropicalGaulCode()).isEqualTo(153); assertThat(occurrence1Location.getCountryGaulCode()).isEqualTo(153); assertThat(occurrence1Location.getQcMessage()).isEqualTo("QC stage 1 passed: location not an ADMIN1 or " + "ADMIN2. QC stage 2 passed: location already within land. QC stage 3 passed: location already " + "within country."); Alert occurrence1Alert = occurrence.getAlert(); assertThat(occurrence1Alert.getFeed().getName()).isEqualTo("SEEG Data 2014"); assertThat(occurrence1Alert.getPublicationDate()).isNull(); assertThat(occurrence1Alert.getHealthMapAlertId()).isNull(); assertThat(occurrence1Alert.getUrl()).isEqualTo("onm.php?id=XX_ALERT_ID_XX"); assertThat(occurrence1Alert.getSummary()).isNull(); assertThat(occurrence1Alert.getTitle()).isEqualTo("Dengue -- Kuala Lumpur, Malaysia"); assertThat(occurrence1Alert.getCreatedDate()).isNotNull(); DiseaseGroup occurrence1DiseaseGroup = occurrence.getDiseaseGroup(); assertThat(occurrence1DiseaseGroup.getName()).isEqualTo("Dengue"); assertThat(occurrence.getOccurrenceDate().getMillis()).isEqualTo( new DateTime("2014-03-10T00:00:00Z").getMillis()); assertThat(occurrence.getCreatedDate()).isNotNull(); } private void assertSecondOccurrence(DiseaseOccurrence occurrence) { Location occurrence2Location = occurrence.getLocation(); assertThat(occurrence2Location.getName()).isEqualTo("New Zealand"); assertThat(occurrence2Location.getGeom().getX()).isEqualTo(176.61475); assertThat(occurrence2Location.getGeom().getY()).isEqualTo(-38.53923); assertThat(occurrence2Location.getPrecision()).isEqualTo(LocationPrecision.COUNTRY); assertThat(occurrence2Location.getGeoNameId()).isNull(); assertThat(occurrence2Location.getHealthMapCountryId()).isNull(); assertThat(occurrence2Location.getCreatedDate()).isNotNull(); assertThat(occurrence2Location.hasPassedQc()).isTrue(); assertThat(occurrence2Location.getAdminUnitQCGaulCode()).isNull(); assertThat(occurrence2Location.getAdminUnitGlobalGaulCode()).isEqualTo(179); assertThat(occurrence2Location.getAdminUnitTropicalGaulCode()).isEqualTo(179); assertThat(occurrence2Location.getCountryGaulCode()).isEqualTo(179); assertThat(occurrence2Location.getQcMessage()).isEqualTo("QC stage 1 passed: location not an ADMIN1 or " + "ADMIN2. QC stage 2 passed: location already within land. QC stage 3 passed: location already within " + "country."); Alert occurrence2Alert = occurrence.getAlert(); assertThat(occurrence2Alert.getFeed().getName()).isEqualTo("SEEG Data 2014"); assertThat(occurrence2Alert.getPublicationDate()).isNull(); assertThat(occurrence2Alert.getHealthMapAlertId()).isNull(); assertThat(occurrence2Alert.getUrl()).isNull(); assertThat(occurrence2Alert.getSummary()).isEqualTo("SPC says the number of dengue fever outbreaks in the" + " Paific over the past year is unprecedented and more research needs to be done into its cause. D" + "uration: 3′ 21″. Play now; Download: Ogg | MP3 ;..."); assertThat(occurrence2Alert.getTitle()).isEqualTo("Regional dengue outbreak unprecedented - SPC - Radio New" + " Zealand"); assertThat(occurrence2Alert.getCreatedDate()).isNotNull(); DiseaseGroup occurrence2DiseaseGroup = occurrence.getDiseaseGroup(); assertThat(occurrence2DiseaseGroup.getName()).isEqualTo("Dengue"); assertThat(occurrence.getOccurrenceDate().getMillis()).isEqualTo( new DateTime("2014-01-13T00:00:00Z").getMillis()); assertThat(occurrence.getCreatedDate()).isNotNull(); } private List<DiseaseOccurrence> getLastTwoDiseaseOccurrences() { List<DiseaseOccurrence> diseaseOccurrences = diseaseOccurrenceDao.getAll(); Collections.sort(diseaseOccurrences, new Comparator<DiseaseOccurrence>() { @Override public int compare(DiseaseOccurrence o1, DiseaseOccurrence o2) { return o1.getId().compareTo(o2.getId()); } }); int size = diseaseOccurrences.size(); assertThat(size).isGreaterThanOrEqualTo(2); return Arrays.asList(diseaseOccurrences.get(size - 2), diseaseOccurrences.get(size - 1)); } }