/** * Copyright 2014 The MITRE Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. * */ package org.opensextant.extractors.geo.rules; import java.util.List; import org.opensextant.data.Place; import org.opensextant.extractors.geo.BoundaryObserver; import org.opensextant.extractors.geo.LocationObserver; import org.opensextant.extractors.geo.CountryObserver; import org.opensextant.extractors.geo.PlaceCandidate; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public abstract class GeocodeRule { public int weight = 0; /* of 10, approximately */ public String NAME = null; protected CountryObserver countryObserver = null; protected LocationObserver coordObserver = null; protected BoundaryObserver boundaryObserver = null; protected Logger log = LoggerFactory.getLogger(getClass()); protected void log(String msg) { log.debug("{}: {}", NAME, msg); } protected void log(String msg, String val) { log.debug("{}: {} / value={}", NAME, msg, val); } public void setCountryObserver(CountryObserver o) { countryObserver = o; } public void setLocationObserver(LocationObserver o) { coordObserver = o; } public void setBoundaryObserver(BoundaryObserver o) { boundaryObserver = o; } /** * Override if rule instance has another view of relevance, e.g. * coordinate rule: no coords found, so rule.isRelevant() is FALSE. * * @return */ public boolean isRelevant() { return true; } /* * Override as needed. static method, because caller need not have an instance of rule * to determine if rule was applied to the candidate. * Child rules cannot override static method here, so by convention, rules can implement isRuleFor as needed. * @param pc * @return */ //public static boolean isRuleFor(PlaceCandidate pc) { // return false; //} public boolean sameCountry(Place p1, Place p2) { if (p1 == null || p2 == null) { return false; } if (p1.getCountryCode() == null || p2.getCountryCode() == null) { return false; } return p1.getCountryCode().equals(p2.getCountryCode()); } /** * Quick test to see if two places are contained within the same boundary. * * @param p1 * @param p2 * @return */ public boolean sameBoundary(Place p1, Place p2) { if (p1 == null || p2 == null) { return false; } if (p1.getAdmin2() != null) { if (p1.getAdmin2().equals(p2.getAdmin2())) { return true; } } return p1.getHierarchicalPath() != null ? p1.getHierarchicalPath().equals(p2.getHierarchicalPath()) : false; } /** * Override here as needed. * * @param name * @return */ public boolean evaluateNameFilterOnly(PlaceCandidate name) { if (name.isFilteredOut()) { return true; } // Some rules may choose early -- and that would prevent other rules // from adding evidence // In this scheme. if (name.getChosen() != null) { // DONE return true; } return false; } /** * * @param names * list of found place names */ public void evaluate(List<PlaceCandidate> names) { if (!isRelevant()) { return; } for (PlaceCandidate name : names) { // Each rule must decide if iterating over name/geo combinations // contributes evidence. But can just as easily see if name.chosen is already // set and exit early. // /* * This was filtered out already so ignore. */ if (evaluateNameFilterOnly(name)) { continue; } for (Place geo : name.getPlaces()) { if (filterOutBySize(name, geo)) { continue; } evaluate(name, geo); if (name.getChosen() != null) { // DONE break; } } } } /** * Certain names appear often around the world... in such cases * we can pare back and evaluate only significant places (e.g., cities and states) * and avoid say streams and roadways by the same name. * * If a name, N, occurs in more than 250 places, then consider only feature classes A and P. * * @param name * @param geo * @return */ protected boolean filterOutBySize(PlaceCandidate name, Place geo) { if (name.distinctLocationCount() > 250) { if (geo.isPopulated() || geo.isAdministrative()) { // allow P places and A boundaries to pass through. return false; } return true; // Filter out everything else. } // Okay, no optimization needed. return false; } /** * The one evaluation scheme that all rules must implement. * Given a single text match and a location, consider if the geo is a good geocoding * for the match. * * @param name * matched name in text * @param geo * gazetteer entry or location */ public abstract void evaluate(PlaceCandidate name, Place geo); /** * no-op, unless overriden. */ public void reset() { } }