//Dstl (c) Crown Copyright 2017
package uk.gov.dstl.baleen.annotators.cleaners;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import com.google.common.base.Strings;
import com.google.common.collect.ImmutableSet;
import uk.gov.dstl.baleen.annotators.cleaners.helpers.AbstractNestedEntities;
import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction;
import uk.gov.dstl.baleen.types.semantic.Location;
/**
* Remove locations which are contained within other locations, copying across GeoJSON information where applicable
*
* <p>All location entities are looped through, and should a location be found to be entirely contained within another
* location it is removed.
* The comparison is done purely on start and end positions, and ignores other information within the entity.
* If two entities of the same type have the same start and end position, then the one with the lower confidence is
* removed; and if both have the same confidence then the first entity in the annotation index is removed.
* However, if both entities have GeoJSON information, and they aren't the same (string comparison currently - no geo fuzzyness),
* then both are kept.</p>
* <p>If the nested entity has GeoJSON information, but the enclosing entity doesn't,
* then the GeoJSON information is copied across before it is removed.</p>
*
*
*/
public class RemoveNestedLocations extends AbstractNestedEntities<Location> {
@Override
protected Collection<List<Location>> compileEntities(JCas jCas) {
return Collections.singletonList( new ArrayList<>(JCasUtil.select(jCas, Location.class)) );
}
@Override
protected boolean shouldMerge(Location keep, Location remove) {
//If they both have GeoJson and the GeoJson differs, then keep both
if(!Strings.isNullOrEmpty(keep.getGeoJson()) && !Strings.isNullOrEmpty(remove.getGeoJson()) && !keep.getGeoJson().equals(remove.getGeoJson())) {
return false;
}
// If keep doesn't have geojson, copy it!
if(Strings.isNullOrEmpty(keep.getGeoJson()) && !Strings.isNullOrEmpty(remove.getGeoJson())){
keep.setGeoJson(remove.getGeoJson());
}
return true;
}
@Override
public AnalysisEngineAction getAction() {
return new AnalysisEngineAction(ImmutableSet.of(Location.class), Collections.emptySet());
}
}