package org.activityinfo.geoadmin; import java.io.IOException; import java.util.Collection; import java.util.List; import org.activityinfo.geoadmin.model.AdminEntity; import com.vividsolutions.jts.geom.Envelope; import com.vividsolutions.jts.index.strtree.STRtree; /** * Given a new set of administrative entities from a shapefile, guess their * parents within the existing administrative entity based on name, code, and * geography * */ public class ParentGuesser { private static final double MIN_SCORE = 0.75; private ImportSource importSource; private STRtree index; public enum Quality { OK, WARNING, SEVERE } /** * * @param importSource * the imported features/entities * @param parents * prospective parents of the source entities */ public ParentGuesser(ImportSource importSource, List<AdminEntity> parents) { super(); this.importSource = importSource; this.index = new STRtree(parents.size()); // create a spatial index to help narrow down the search for(AdminEntity entity : parents) { Envelope mbr = GeoUtils.toEnvelope(entity.getBounds()); mbr.expandBy(mbr.getWidth() * 0.10, mbr.getHeight() * 0.10); index.insert(mbr, entity); } } public AdminEntity[] run() throws IOException { AdminEntity[] matches = new AdminEntity[importSource.getFeatureCount()]; for (int i = 0; i != matches.length; ++i) { matches[i] = findBestMatch(importSource.getFeatures().get(i)); } return matches; } /** * Finds the best matching parent for a given feature * * @param featureIndex * the index of the feature in the import source * @return the best matching admin entity */ private AdminEntity findBestMatch(ImportFeature feature) { List<AdminEntity> spatialMatches = index.query(feature.getEnvelope()); return findBestParent(feature, spatialMatches); } public static AdminEntity findBestParent(ImportFeature feature, Collection<AdminEntity> spatialMatches) { double bestScore = MIN_SCORE; AdminEntity bestParent = null; for (AdminEntity parent : spatialMatches) { double score = scoreParent(feature, parent); if (score > bestScore) { bestScore = score; bestParent = parent; } } return bestParent; } /** * Scores a prospective parent based on geography, name and code * * @param feature * @param parent * @return a score describe how will the parent entity matches as a parent * of the feature at feature index. 0 = poor match. */ private static double scoreParent(ImportFeature feature, AdminEntity parent) { // parent should completely contain the child // find the proportion contained double propContained = scoreGeography(feature, parent); // check the name similarity double nameSimilarity = scoreName(feature, parent); // check for the presence of the code double codeScore = scoreCodeMatch(feature, parent); // System.out.println(String.format("%s <> %s %.2f %.2f %.2f", // importSource.featureToString(featureIndex), // propContained, nameSimilarity, codeScore)); return propContained + (nameSimilarity * 3d) + codeScore; } /** * Scores the prospective parent based on name similarity. 1=high, meaning * that the feature contains an exact match of the parent's name in one of * its columns. * * @param feature * @param parent * the prospective parent to evaluate * @return a score from 0=poor match, 1=perfect match */ public static double scoreName(ImportFeature feature, AdminEntity parent) { return feature.similarity(parent.getName()); } /** * Scores the prospective parent based on the presence of the parent code in * the feature's column. * * @param feature * the index of the feature * @param parent * the prospective parent to evaluate * @return a score from 0=poor match, 1=perfect match */ public static double scoreCodeMatch(ImportFeature feature, AdminEntity parent) { if (parent.getCode() != null) { if (Codes.hasCode(feature.getAttributeValues(), parent.getCode())) { return 1; } } return 0; } /** * Scores the prospective parent based on geography. A perfectly matched * parent will entirely contain the child entity. (we only use MBRs here) * * @param feature * @param parent * the prospective parent to evaluate * @return a score from 0=poor match, no intersection, 1=perfect match, * competely contained */ public static double scoreGeography(ImportFeature feature, AdminEntity parent) { Envelope parentEnvelope = GeoUtils.toEnvelope(parent.getBounds()); Envelope childEnvelope = feature.getEnvelope(); if(childEnvelope.getArea() > 0) { double propContained = parentEnvelope.intersection(childEnvelope).getArea() / childEnvelope.getArea(); return propContained; } else { // we have only a point representation return parentEnvelope.contains(childEnvelope) ? 1 : 0; } } /** * Evaluates the quality of the match between an imported feature and and a * prospective parent. * * @param feature * @param parent * the propsective parent * * @return a qualitative evaluation of the match */ public Quality quality(ImportFeature feature, AdminEntity parent) { double geoScore = scoreGeography(feature, parent); if (geoScore < 0.97) { return Quality.WARNING; } if (geoScore < 0.90) { return Quality.SEVERE; } double nameScore = scoreName(feature, parent); if (nameScore < 0.80) { return Quality.SEVERE; } if (nameScore < 1) { return Quality.WARNING; } return Quality.OK; } }