//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.cleaners; import java.util.Collections; import com.google.common.collect.ImmutableSet; import uk.gov.dstl.baleen.annotators.cleaners.helpers.AbstractNormalizeEntities; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.types.geo.Coordinate; import uk.gov.dstl.baleen.types.semantic.Entity; /** * Formats the value of the OSGB entities to be consistent for export and entity * matching between documents. The format is two upper case letters followed by * an even digit number with no whitespace or other characters. * * @baleen.javadoc */ public class NormalizeOSGB extends AbstractNormalizeEntities { @Override protected String normalize(Entity e) { String osgb = e.getValue(); osgb = osgb.replaceAll("[\\s]", ""); osgb = osgb.toUpperCase(); return osgb; } @Override protected boolean shouldNormalize(Entity e) { return (e instanceof Coordinate) && ("osgb".equals(e.getSubType())); } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(ImmutableSet.of(Coordinate.class), Collections.emptySet()); } }