//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.cleaners; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.regex.Pattern; import org.apache.commons.lang.StringUtils; import org.apache.uima.UimaContext; import org.apache.uima.resource.ResourceInitializationException; import com.google.common.collect.ImmutableSet; import uk.gov.dstl.baleen.core.pipelines.orderers.AnalysisEngineAction; import uk.gov.dstl.baleen.types.common.Quantity; import uk.gov.dstl.baleen.types.semantic.Entity; /** * Find adjacent quantities of the same type and merge them * * <p>This annotator will find adjacent quantities of the same type and merge them into a single quantity. * For example, 7lb 4oz should be annotated as a single entity, not two.</p> */ public class MergeAdjacentQuantities extends MergeAdjacent { @Override public void doInitialize(UimaContext aContext) throws ResourceInitializationException{ separatorPattern = Pattern.compile(separator); classTypes = new ArrayList<>(); classTypes.add(Quantity.class); } @Override public boolean shouldMerge(Entity e1, Entity e2){ if(!(e1 instanceof Quantity && e2 instanceof Quantity)){ return false; } Quantity q1 = (Quantity) e1; Quantity q2 = (Quantity) e2; return q1.getNormalizedQuantity() >= q2.getNormalizedQuantity() && StringUtils.equals(q1.getSubType(), q2.getSubType()) && StringUtils.equals(q1.getNormalizedUnit(), q2.getNormalizedUnit()); } @Override public boolean mergeAdditionalProperties(Entity merged, Class<? extends Entity> type, List<Entity> originalEntities){ if(type != Quantity.class || merged.getClass() != type) return false; Quantity qMerged = (Quantity) merged; Double normalizedQuantity = 0.0; Set<String> units = new HashSet<>(); for(Entity e : originalEntities){ if(e.getClass() != type) continue; Quantity q = (Quantity) e; if(StringUtils.isNotBlank(q.getUnit())){ boolean newUnit = units.add(q.getUnit()); if(!newUnit) return false; } normalizedQuantity += q.getNormalizedQuantity(); setNormalizedUnit(qMerged, q.getNormalizedUnit()); setSubType(qMerged, q.getSubType()); } qMerged.setNormalizedQuantity(normalizedQuantity); return true; } private void setNormalizedUnit(Quantity qMerged, String unit){ if(StringUtils.isBlank(qMerged.getNormalizedUnit())){ qMerged.setNormalizedUnit(unit); } } private void setSubType(Quantity qMerged, String type){ if(StringUtils.isBlank(qMerged.getSubType())){ qMerged.setSubType(type); } } @Override public AnalysisEngineAction getAction() { return new AnalysisEngineAction(ImmutableSet.of(Quantity.class), ImmutableSet.of(Quantity.class)); } }