/** * * Copyright 2012-2013 The MITRE Corporation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * ************************************************************************** * NOTICE * This software was produced for the U. S. Government under Contract No. * W15P7T-12-C-F600, and is subject to the Rights in Noncommercial Computer * Software and Noncommercial Computer Software Documentation Clause * 252.227-7014 (JUN 1995) * * (c) 2012 The MITRE Corporation. All Rights Reserved. * ************************************************************************** * * * @author dlutz, MITRE creator (lutzdavp) * @author ubaldino, MITRE adaptor * @author swainza */ package org.opensextant.extractors.xcoord; import java.io.IOException; import java.io.InputStream; import java.util.HashMap; import java.util.Map; import org.opensextant.extractors.flexpat.PatternTestCase; import org.opensextant.extractors.flexpat.RegexPattern; import org.opensextant.extractors.flexpat.RegexPatternManager; /** * <p> * This is the culmination of various coordinate extraction efforts in python * and Java. This API poses no assumptions on input data or on execution. * * * <p > * Common Coordinate Enumeration (CCE) is a concept for enumerating the * coordinate representations. See XConstants for details. * * The basics of CCE include a family (DD, DMS, MGRS, etc.) and style ( * enumerated in patterns config file). * * * <p > * Features of REGEX patterns file: * <ul> * <li>DEFINE - a component of a coord pattern to match</li> * <li>RULE - a complete pattern to match</li> * <li>TEST - an example of the text the pattern should match in part or whole.</li> * </ul> * * * <p > * The Rules file: The Rules is an external text file containing rules * consisting of regular expressions used to identify geocoords. Below is an * example of what a simple rule might look like: * * <pre> * // Parts of a decimal degree Latitude/Longitude * #DEFINE decDegLat \d?\d\.\d{1,20} * #DEFINE decDegLon [0-1]?\d?\d\.\d{1,20} * * // TARGET: DD-xx, Decimal Deg, Preceding Hemisphere (a) H DD.DDDDDD° HDDD.DDDDDD°, optional deg symbol * #RULE DD 01 <hemiLatPre>\s?<decDegLat><degSym>?\s*<latlonSep>?\s*<hemiLonPre>\s?<decDegLon>lt;degSym>? * #TEST DD 01 N42.3, W102.4 * </pre> * * Where the DEFINE statements relay fields that the PatternManager will recall * at runtime. The RULE is a composition of DEFINEs, other literals and regex * patterns. A rule must have a family and a rule ID within that family. And the * TEST statement (which is enumerated the same as the RULE family and ID). At * runtime all tests are further labeled with an incrementor, e.g. for TEST * "DD-01" might be the eighth test in the pattern file, so the test will be * labeled internally as DD-01#8. * * * */ public final class PatternManager extends RegexPatternManager { public PatternManager(InputStream s, String n) throws IOException { super(s, n); } /** * */ public Map<Integer, Boolean> CCE_family_state = new HashMap<Integer, Boolean>(); /** * * @throws IOException */ @Override public void initialize(InputStream io) throws IOException { CCE_family_state = new HashMap<Integer, Boolean>(); super.initialize(io); log.debug(this.getConfigurationDebug()); } /** * * @param cce_fam * @param enabled */ public void enable_CCE_family(int cce_fam, boolean enabled) { CCE_family_state.put(cce_fam, enabled); // And re-set all such patterns. if (patterns_list.size() > 0) { for (RegexPattern repat : patterns_list) { GeocoordPattern pat = (GeocoordPattern) repat; // This seems like overkill, but just changing the states of // patterns for the specified group of patterns. if (pat.cce_family_id == cce_fam) { enable_pattern(pat); } } } } /** * enable an instance of a pattern based on the global settings. * * @param repat */ @Override public void enable_pattern(RegexPattern repat) { GeocoordPattern p = (GeocoordPattern) repat; Boolean b = CCE_family_state.get(p.cce_family_id); if (b != null) { p.enabled = b.booleanValue(); } } /** * Implementation must create a RegexPattern given the basic RULE define, * #RULE FAMILY RID REGEX PatternManager here adds compiled pattern and * DEFINES. * * @param fam * @param rule * @param desc * @return */ @Override protected RegexPattern create_pattern(String fam, String rule, String desc) { return new GeocoordPattern(fam, rule, desc); } /** * Implementation has the option to check a pattern; For now invalid * patterns are only logged. * * @param repat * @return */ @Override protected boolean validate_pattern(RegexPattern repat) { GeocoordPattern p = (GeocoordPattern) repat; if (p.cce_family_id == XConstants.UNK_PATTERN) { log.error("Invalid Pattern @ " + p.toString()); } return (p.cce_family_id != XConstants.UNK_PATTERN); } /** * Implementation must create TestCases given the #TEST directive, #TEST RID * TID TEXT * * @param id * @param text * @param fam * @return */ @Override protected PatternTestCase create_testcase(String id, String fam, String text) { return new GeocoordTestCase(id, fam, text); } }