package uk.ac.ox.zoo.seeg.abraid.mp.common.service.workflow.support.extent;
import org.joda.time.DateTime;
import uk.ac.ox.zoo.seeg.abraid.mp.common.domain.*;
import java.util.*;
import static ch.lambdaj.Lambda.index;
import static ch.lambdaj.Lambda.on;
/**
* A helper for the DiseaseExtentGenerator class.
* Copyright (c) 2014 University of Oxford
*/
public class DiseaseExtentGeneratorHelper {
private static final int SCALING_FACTOR = 50;
private static final int LATEST_OCCURRENCES_MAX_LIST_SIZE = 5;
// Input data
private final DiseaseExtentGenerationInputData inputData;
private final DiseaseExtent parameters;
// Working fields
private final Map<String, DiseaseExtentClass> classesByName;
private final Map<Integer, AdminUnitGlobalOrTropical> adminUnitsByGaulCode;
private final Map<Integer, List<DiseaseOccurrence>> occurrencesByAdminUnit;
private final Map<Integer, List<DiseaseOccurrence>> occurrencesByParentCountry;
private final Map<Integer, List<AdminUnitReview>> reviewsByAdminUnit;
public DiseaseExtentGeneratorHelper(DiseaseExtentGenerationInputData inputData, DiseaseExtent parameters) {
this.inputData = inputData;
this.parameters = parameters;
// Do initial processing
this.classesByName = indexDiseaseExtentClassesOnName();
this.adminUnitsByGaulCode = indexAdminUnitsByGaulCode();
this.occurrencesByAdminUnit = groupOccurrencesByAdminUnit();
this.occurrencesByParentCountry = groupOccurrencesByParentCountry();
this.reviewsByAdminUnit = groupReviewsByAdminUnit();
}
/**
* Index the disease extent classes by name.
*/
private Map<String, DiseaseExtentClass> indexDiseaseExtentClassesOnName() {
return index(inputData.getDiseaseExtentClasses(), on(DiseaseExtentClass.class).getName());
}
/**
* Index the admin units by gaul code (global or tropical).
*/
private Map<Integer, AdminUnitGlobalOrTropical> indexAdminUnitsByGaulCode() {
return index(inputData.getAdminUnits(), on(AdminUnitGlobalOrTropical.class).getGaulCode());
}
/**
* Groups the disease occurrences by admin unit (global or tropical).
*/
private Map<Integer, List<DiseaseOccurrence>> groupOccurrencesByAdminUnit() {
// Create empty groups of occurrences by admin unit
Map<Integer, List<DiseaseOccurrence>> groups = new HashMap<>();
for (AdminUnitGlobalOrTropical adminUnit : inputData.getAdminUnits()) {
groups.put(adminUnit.getGaulCode(), new ArrayList<DiseaseOccurrence>());
}
// Add occurrences to the groups
if (inputData.getOccurrences() != null) {
for (DiseaseOccurrence occurrence : inputData.getOccurrences()) {
Integer gaulCode = extractGaulCode(occurrence);
// Exclude occurrences that have country precision if the admin unit is not a country. For example, the
// centroid of the United States is in Kansas, but we should not count a United Status country-level
// point as being a disease occurrence in Kansas itself.
if (!isACountryPointInANonCountryAdminUnit(occurrence, gaulCode)) {
groups.get(gaulCode).add(occurrence);
}
}
}
return groups;
}
/**
* Groups the occurrences by country (only for countries that are split into sub admin units).
* The country GAUL code is taken from the admin unit global/tropical entity.
*/
private Map<Integer, List<DiseaseOccurrence>> groupOccurrencesByParentCountry() {
Map<Integer, List<DiseaseOccurrence>> groups = new HashMap<>();
for (AdminUnitGlobalOrTropical adminUnit : inputData.getAdminUnits()) {
Integer countryGaulCode = adminUnit.getCountryGaulCode();
if (countryGaulCode != null && !groups.containsKey(countryGaulCode)) {
groups.put(countryGaulCode, new ArrayList<DiseaseOccurrence>());
}
}
if (inputData.getOccurrences() != null) {
for (DiseaseOccurrence occurrence : inputData.getOccurrences()) {
AdminUnitGlobalOrTropical adminUnit = adminUnitsByGaulCode.get(extractGaulCode(occurrence));
Integer countryGaulCode = adminUnit.getCountryGaulCode();
if (countryGaulCode != null) {
groups.get(countryGaulCode).add(occurrence);
}
}
}
return groups;
}
/**
* Groups the expert reviews by admin unit (strictly, by admin unit GAUL code).
*/
private Map<Integer, List<AdminUnitReview>> groupReviewsByAdminUnit() {
// Create empty groups of reviews by admin unit
Map<Integer, List<AdminUnitReview>> groups = new HashMap<>();
for (AdminUnitGlobalOrTropical adminUnit : inputData.getAdminUnits()) {
groups.put(adminUnit.getGaulCode(), new ArrayList<AdminUnitReview>());
}
if (inputData.getReviews() != null) {
// Add reviews to the groups
for (AdminUnitReview review : inputData.getReviews()) {
Integer gaulCode = review.getAdminUnitGlobalOrTropicalGaulCode();
groups.get(gaulCode).add(review);
}
}
return groups;
}
/**
* Computes the disease extent classes.
* For each admin unit, convert its list of disease occurrences and reviews into a disease extent class.
* Also collate the count of occurrences and most recent occurrences for each admin unit.
* @return The extent generation results set.
*/
public DiseaseExtentGenerationOutputData computeDiseaseExtent() {
final Map<Integer, DiseaseExtentClass> classesByAdminUnit = new HashMap<>();
final Map<Integer, Integer> occurrenceCountByAdminUnit = new HashMap<>();
final Map<Integer, Collection<DiseaseOccurrence>> latestOccurrencesByAdminUnit = new HashMap<>();
for (AdminUnitGlobalOrTropical adminUnit : inputData.getAdminUnits()) {
final List<DiseaseOccurrence> occurrencesForAdminUnit = occurrencesByAdminUnit.get(adminUnit.getGaulCode());
final List<AdminUnitReview> reviewsForAdminUnit = reviewsByAdminUnit.get(adminUnit.getGaulCode());
final List<DiseaseOccurrence> occurrencesForParentCountry =
occurrencesByParentCountry.get(adminUnit.getCountryGaulCode());
String extentClassNameForAdminUnit = computeAdminUnit(
occurrencesForAdminUnit, reviewsForAdminUnit, occurrencesForParentCountry);
classesByAdminUnit.put(adminUnit.getGaulCode(), classesByName.get(extentClassNameForAdminUnit));
occurrenceCountByAdminUnit.put(adminUnit.getGaulCode(), occurrencesForAdminUnit.size());
latestOccurrencesByAdminUnit.put(adminUnit.getGaulCode(), pickLatestOccurrences(occurrencesForAdminUnit));
}
return new DiseaseExtentGenerationOutputData(
classesByAdminUnit, occurrenceCountByAdminUnit, latestOccurrencesByAdminUnit);
}
private Collection<DiseaseOccurrence> pickLatestOccurrences(List<DiseaseOccurrence> occurrences) {
Collections.sort(occurrences, new Comparator<DiseaseOccurrence>() {
@Override
public int compare(DiseaseOccurrence o1, DiseaseOccurrence o2) {
return o2.getOccurrenceDate().compareTo(o1.getOccurrenceDate()); // descending
}
});
int n = Math.min(occurrences.size(), LATEST_OCCURRENCES_MAX_LIST_SIZE);
return occurrences.subList(0, n);
}
/**
* Compute the disease extent class for an admin unit.
* @param occurrencesForAdminUnit The occurrences in the admin unit.
* @param reviewsForAdminUnit The occurrences of the admin unit.
* @param occurrencesForParentCountry The number occurrences in parent country (null if there is no parent country).
* @return The name of the disease extent class.
*/
private String computeAdminUnit(List<DiseaseOccurrence> occurrencesForAdminUnit,
List<AdminUnitReview> reviewsForAdminUnit,
List<DiseaseOccurrence> occurrencesForParentCountry) {
// Computes the updated disease extent class for one admin unit
if (occurrencesForAdminUnit.size() != 0 || reviewsForAdminUnit.size() != 0) {
// The are occurrences and/or reviews, use the score for the admin unit, using reviews, without a factor
return computeDiseaseExtentClass(occurrencesForAdminUnit, reviewsForAdminUnit, 1);
} else if (occurrencesForParentCountry != null) {
// There are no occurrences or reviews, so use the score for the parent country, without reviews, halved
return computeDiseaseExtentClass(occurrencesForParentCountry, new ArrayList<AdminUnitReview>(), 0.5); ///CHECKSTYLE:SUPPRESS LineLengthCheck|MagicNumberCheck
} else {
// There are no occurrences or reviews and the admin unit doesn't have a parent country (it is a country)
return DiseaseExtentClass.UNCERTAIN;
}
}
/**
* Computes a disease extent class, based on a list of occurrences and a list of reviews.
* @param occurrencesList The list of occurrences.
* @param reviewsList The list of reviews.
* @return The computed disease extent class name.
*/
private String computeDiseaseExtentClass(
List<DiseaseOccurrence> occurrencesList, List<AdminUnitReview> reviewsList, double factor) {
double overallScore = computeScoreForOccurrencesAndReviews(occurrencesList, reviewsList);
overallScore = overallScore * factor;
if (overallScore > 1) {
return DiseaseExtentClass.PRESENCE;
} else if (overallScore > 0) {
return DiseaseExtentClass.POSSIBLE_PRESENCE;
} else if (overallScore == 0) {
return DiseaseExtentClass.UNCERTAIN;
} else if (overallScore >= -1) {
return DiseaseExtentClass.POSSIBLE_ABSENCE;
} else {
return DiseaseExtentClass.ABSENCE;
}
}
/**
* Computes a disease extent score, based on a list of occurrences and a list of reviews.
* @param occurrencesList The list of occurrences.
* @param reviewsList The list of reviews.
* @return The computed disease score.
*/
private double computeScoreForOccurrencesAndReviews(List<DiseaseOccurrence> occurrencesList,
List<AdminUnitReview> reviewsList) {
// Compute the score for each occurrence and each review, and take the average
// Be extra careful with int -> double conversions...
double occurrencesScore = computeOccurrencesScore(occurrencesList);
double reviewsScore = computeReviewsScore(reviewsList);
double totalScore = occurrencesScore + reviewsScore;
double totalCount = occurrencesList.size() + reviewsList.size();
return (totalCount == 0) ? 0 : (totalScore / totalCount);
}
private int computeOccurrencesScore(List<DiseaseOccurrence> occurrenceList) {
DateTime oldestDateForHigherScore =
DateTime.now().minusMonths(parameters.getMaxMonthsAgoForHigherOccurrenceScore());
// Unlike computeReviewsScore(), the total is an integer so that we can maintain full accuracy over multiple
// additions
int total = 0;
for (DiseaseOccurrence occurrence : occurrenceList) {
// The score for each occurrence depends on the occurrence date. It scores the "higher score" unless it
// is older than the oldest date allowed for the higher score, in which case it scores the "lower score".
// These values are all defined by the disease extent parameters.
boolean useLowerScore = occurrence.getOccurrenceDate().isBefore(oldestDateForHigherScore);
total += useLowerScore ? parameters.getLowerOccurrenceScore() : parameters.getHigherOccurrenceScore();
}
return total;
}
private double computeReviewsScore(List<AdminUnitReview> reviewsList) {
double total = 0;
for (AdminUnitReview review : reviewsList) {
// The response weighting is currently divided by 50 so that the weightings in the database (which
// were chosen for use with the model) can be used for our purposes. Eventually this should be removed.
int scaledResponseWeighting = review.getResponse().getWeighting() / SCALING_FACTOR;
total += scaledResponseWeighting * review.getExpert().getWeighting();
}
return total;
}
private int extractGaulCode(DiseaseOccurrence occurrence) {
return occurrence.getDiseaseGroup().isGlobal() ?
occurrence.getLocation().getAdminUnitGlobalGaulCode() :
occurrence.getLocation().getAdminUnitTropicalGaulCode();
}
private boolean isACountryPointInANonCountryAdminUnit(DiseaseOccurrence occurrence, Integer gaulCode) {
return (occurrence.getLocation().getPrecision() == LocationPrecision.COUNTRY) &&
(adminUnitsByGaulCode.get(gaulCode).getLevel() != '0');
}
}