package uk.ac.ox.zoo.seeg.abraid.mp.common.service.workflow.support;
import org.apache.commons.mail.EmailException;
import org.apache.log4j.Logger;
import uk.ac.ox.zoo.seeg.abraid.mp.common.domain.DiseaseGroup;
import uk.ac.ox.zoo.seeg.abraid.mp.common.domain.DiseaseOccurrence;
import uk.ac.ox.zoo.seeg.abraid.mp.common.service.core.DiseaseService;
import uk.ac.ox.zoo.seeg.abraid.mp.common.service.core.EmailService;
import uk.ac.ox.zoo.seeg.abraid.mp.common.service.core.GeometryService;
import java.util.*;
import static ch.lambdaj.Lambda.*;
import static java.util.Map.Entry;
import static org.hamcrest.Matchers.notNullValue;
/**
* Return the set of occurrences to be used in the model run, satisfying Minimum Data Volume and Spread conditions.
* Copyright (c) 2014 University of Oxford
*/
public class ModelRunOccurrencesSelectorHelper {
// Log messages
private static final String NOT_REQUESTING_EMAIL_MESSAGE_PREFIX =
"Not requesting a model run for disease group %d (%s) because ";
private static final String NOT_REQUESTING_EMAIL_SUBJECT = "Minimum Data Volume/Spread Not Satisfied";
private static final String MDV_SATISFIED_LOG_MESSAGE =
"Minimum Data Volume is satisfied: %d occurrence(s) exceeds threshold of %d";
private static final String MDV_NOT_SATISFIED_LOG_MESSAGE =
"Minimum Data Volume is not satisfied: %d occurrence(s) does not exceed threshold of %d";
private static final String MDS_NOT_SATISFIED_LOG_MESSAGE =
"Minimum Data Spread is not satisfied: no occurrences in any countries of interest";
private static final String AFRICAN_COUNTRY_CLAUSE =
"at least 1 occurrence in %d countries, and at least %d occurrence(s) in %d countries";
private static final String AFRICAN_MDS_NOT_SATISFIED_LOG_MESSAGE =
"Minimum Data Spread is not satisfied: " +
"should have " + AFRICAN_COUNTRY_CLAUSE + ", but only has " + AFRICAN_COUNTRY_CLAUSE;
private static final String OTHER_COUNTRY_CLAUSE =
"at least 1 occurrence in %d distinct countries";
private static final String OTHER_MDS_NOT_SATISFIED_LOG_MESSAGE =
"Minimum Data Spread is not satisfied: " +
OTHER_COUNTRY_CLAUSE + " does not exceed threshold of %d countries";
private static final String MDS_SATISFIED_LOG_MESSAGE =
"Minimum Data Spread is satisfied: ";
private static final String SKIPPING_MDS_CALCULATION =
"Skipping Minimum Data Spread calculation; at least one parameter is not defined";
// Exception messages (these are displayed in the user interface via the Bad Request response text)
private static final String EXCEPTION_MESSAGE_PREFIX = "Model cannot run because ";
private static final Logger LOGGER = Logger.getLogger(ModelRunOccurrencesSelectorHelper.class);
private DiseaseService diseaseService;
private GeometryService geometryService;
private EmailService emailService;
// Minimum Data Spread parameters for the disease group
private List<DiseaseOccurrence> allOccurrences;
private DiseaseGroup diseaseGroup;
private int minDataVolume;
private Integer minDistinctCountries;
private Integer highFrequencyThreshold;
private Integer minHighFrequencyCountries;
private Boolean occursInAfrica;
// Reference structures used to compare values against in MDS checks
private List<Integer> countriesOfInterest;
private Set<Integer> countriesWithAtLeastOneOccurrence; // For disease groups using all countries
private Map<Integer, Integer> occurrenceCountPerCountry; // For disease groups using only the African countries
public ModelRunOccurrencesSelectorHelper(DiseaseService diseaseService, GeometryService geometryService,
EmailService emailService, int diseaseGroupId,
boolean onlyUseGoldStandardOccurrences) {
this.diseaseService = diseaseService;
this.geometryService = geometryService;
this.emailService = emailService;
initialise(diseaseGroupId, onlyUseGoldStandardOccurrences);
}
// Set the MDS calculation parameters for the specified disease group.
private void initialise(int diseaseGroupId, boolean onlyUseGoldStandardOccurrences) {
allOccurrences = diseaseService.getDiseaseOccurrencesForModelRunRequest(diseaseGroupId,
onlyUseGoldStandardOccurrences);
diseaseGroup = diseaseService.getDiseaseGroupById(diseaseGroupId);
minDataVolume = diseaseGroup.getMinDataVolume();
minDistinctCountries = diseaseGroup.getMinDistinctCountries();
highFrequencyThreshold = diseaseGroup.getHighFrequencyThreshold();
minHighFrequencyCountries = diseaseGroup.getMinHighFrequencyCountries();
occursInAfrica = diseaseGroup.occursInAfrica();
}
/**
* Gets the list of occurrences to be used in the model run.
* @return The list of occurrences with which to run the model,
* @throws ModelRunWorkflowException if the model should not run because the required thresholds have not been
* reached.
*/
public List<DiseaseOccurrence> selectModelRunDiseaseOccurrences() throws ModelRunWorkflowException {
List<DiseaseOccurrence> occurrences = null;
// Minimum Data Volume must always be satisfied
if (minDataVolumeSatisfied()) {
LOGGER.info(String.format(MDV_SATISFIED_LOG_MESSAGE, allOccurrences.size(), minDataVolume));
if (diseaseGroup.isAutomaticModelRunsEnabled()) {
// If automatic model runs are enabled, select the subset of occurrences provided by the Minimum
// Data Volume, then add occurrences until the Minimum Data Spread is achieved. These are selected
// most recent first, to keep the model input data contemporary.
occurrences = selectSubset();
if (occursInAfrica != null) {
occurrences = occursInAfrica ? refineSubsetForAfricanDiseaseGroup(occurrences) :
refineSubsetForOtherDiseaseGroup(occurrences);
}
} else {
// If automatic model runs are disabled, all occurrences are sent to the model
occurrences = allOccurrences;
}
} else {
handleCannotRunModel(String.format(MDV_NOT_SATISFIED_LOG_MESSAGE, allOccurrences.size(), minDataVolume)
);
}
return occurrences;
}
private boolean minDataVolumeSatisfied() {
return (allOccurrences.size() >= minDataVolume);
}
// Select subset of n most recent occurrences (allOccurrences list is sorted by occurrence date)
// N.B. Occurrences must be added to a new list, instead of returning a subList. The latter only provides a view to
// allOccurrences, so adding to occurrences also adds to allOccurrences, leading to OutOfMemoryError.
private List<DiseaseOccurrence> selectSubset() {
List<DiseaseOccurrence> occurrences = new ArrayList<>();
for (int i = 0; i < minDataVolume; i++) {
occurrences.add(allOccurrences.get(i));
}
return occurrences;
}
// If MDS is not met, continue to select points until it does, unless we run out of points.
private List<DiseaseOccurrence> refineSubsetForAfricanDiseaseGroup(List<DiseaseOccurrence> occurrences) {
if (parametersNotNull(minDistinctCountries, highFrequencyThreshold, minHighFrequencyCountries)) {
countriesOfInterest = geometryService.getCountriesForMinDataSpreadCalculation();
constructOccurrenceCountPerCountryMap(occurrences);
while (!minDataSpreadCheckForAfricanDiseaseGroup()) {
int n = occurrences.size();
if (n == allOccurrences.size()) {
handleCannotRunModel(buildAfricanMDSNotSatisfiedLogMessage());
}
DiseaseOccurrence nextOccurrence = allOccurrences.get(n);
occurrences.add(nextOccurrence);
addCountryToOccurrenceCountMap(nextOccurrence.getLocation().getCountryGaulCode());
}
handleCanRunModel();
} else {
LOGGER.info(SKIPPING_MDS_CALCULATION);
}
return occurrences;
}
private List<DiseaseOccurrence> refineSubsetForOtherDiseaseGroup(List<DiseaseOccurrence> occurrences) {
if (minDistinctCountries != null) {
extractDistinctGaulCodes(occurrences);
while (!minDataSpreadCheckForOtherDiseaseGroup()) {
int n = occurrences.size();
if (n == allOccurrences.size()) {
handleCannotRunModel(buildOtherMDSNotSatisfiedLogMessage());
}
DiseaseOccurrence nextOccurrence = allOccurrences.get(n);
occurrences.add(nextOccurrence);
countriesWithAtLeastOneOccurrence.add(nextOccurrence.getLocation().getCountryGaulCode());
}
handleCanRunModel();
} else {
LOGGER.info(SKIPPING_MDS_CALCULATION);
}
return occurrences;
}
private static boolean parametersNotNull(Integer... args) {
List<Integer> values = Arrays.asList(args);
List<Integer> notNullValues = filter(notNullValue(), values);
return (values.size() == notNullValues.size());
}
private void constructOccurrenceCountPerCountryMap(List<DiseaseOccurrence> occurrences) {
occurrenceCountPerCountry = new HashMap<>();
for (DiseaseOccurrence occurrence : occurrences) {
Integer countryGaulCode = occurrence.getLocation().getCountryGaulCode();
addCountryToOccurrenceCountMap(countryGaulCode);
}
}
// Only "Countries of Interest" are added to the map - with their corresponding occurrence count
private void addCountryToOccurrenceCountMap(Integer gaulCode) {
if (countriesOfInterest.contains(gaulCode)) {
int value = (occurrenceCountPerCountry.containsKey(gaulCode)) ? occurrenceCountPerCountry.get(gaulCode) : 0;
occurrenceCountPerCountry.put(gaulCode, value + 1);
}
}
private boolean minDataSpreadCheckForAfricanDiseaseGroup() {
Set<Integer> distinctCountries = occurrenceCountPerCountry.keySet();
boolean distinctCountriesCheck = (distinctCountries.size() >= minDistinctCountries);
Set<Integer> highFrequencyOccurrenceCountries = extractHighFrequencyCountries();
boolean highFrequencyCountriesCheck = (highFrequencyOccurrenceCountries.size() >= minHighFrequencyCountries);
return (distinctCountriesCheck & highFrequencyCountriesCheck);
}
private Set<Integer> extractHighFrequencyCountries() {
Set<Integer> set = new HashSet<>();
for (Entry<Integer, Integer> entry : occurrenceCountPerCountry.entrySet()) {
if (entry.getValue() >= highFrequencyThreshold) {
set.add(entry.getKey());
}
}
return set;
}
private void extractDistinctGaulCodes(List<DiseaseOccurrence> occurrences) {
countriesWithAtLeastOneOccurrence = new HashSet<>(
extract(occurrences, on(DiseaseOccurrence.class).getLocation().getCountryGaulCode())
);
}
private boolean minDataSpreadCheckForOtherDiseaseGroup() {
return (countriesWithAtLeastOneOccurrence.size() >= minDistinctCountries);
}
private String buildAfricanMDSNotSatisfiedLogMessage() {
int n = occurrenceCountPerCountry.keySet().size();
return (n == 0) ? MDS_NOT_SATISFIED_LOG_MESSAGE : String.format(AFRICAN_MDS_NOT_SATISFIED_LOG_MESSAGE,
minDistinctCountries, highFrequencyThreshold, minHighFrequencyCountries,
occurrenceCountPerCountry.keySet().size(), highFrequencyThreshold, extractHighFrequencyCountries().size());
}
private String buildOtherMDSNotSatisfiedLogMessage() {
int n = countriesWithAtLeastOneOccurrence.size();
return (n == 0) ? MDS_NOT_SATISFIED_LOG_MESSAGE : String.format(OTHER_MDS_NOT_SATISFIED_LOG_MESSAGE,
countriesWithAtLeastOneOccurrence.size(), minDistinctCountries);
}
private void handleCannotRunModel(String message) {
// Log so it's in the logs. Send an e-mail to the default address so that the message is visible to the user
// (particularly relevant if this was triggered by Data Manager). Throw an exception so that the transaction
// rolls back, and to send a message back to the user if it was triggered manually.
String logMessage = String.format(NOT_REQUESTING_EMAIL_MESSAGE_PREFIX + message, diseaseGroup.getId(),
diseaseGroup.getName());
LOGGER.warn(logMessage);
try {
emailService.sendEmail(NOT_REQUESTING_EMAIL_SUBJECT, logMessage);
} catch (EmailException e) {
throw new RuntimeException(e);
}
// And throw an exception
throw new ModelRunWorkflowException(EXCEPTION_MESSAGE_PREFIX + message);
}
private void handleCanRunModel() {
String message;
if (occursInAfrica) {
message = String.format(AFRICAN_COUNTRY_CLAUSE,
occurrenceCountPerCountry.keySet().size(), highFrequencyThreshold, extractHighFrequencyCountries().size());
} else {
message = String.format(OTHER_COUNTRY_CLAUSE, countriesWithAtLeastOneOccurrence.size());
}
LOGGER.info(MDS_SATISFIED_LOG_MESSAGE + message);
}
}