//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.annotators.coreference.impl.sieves; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.stream.Collectors; import org.apache.uima.jcas.JCas; import uk.gov.dstl.baleen.annotators.coreference.impl.data.Cluster; import uk.gov.dstl.baleen.annotators.coreference.impl.data.Mention; import uk.gov.dstl.baleen.annotators.coreference.impl.data.MentionType; /** * String matching which is more easily satisfied than exact matching. */ public class RelaxedStringMatchSieve extends AbstractCoreferenceSieve { private static final Set<String> EXCLUDED = new HashSet<>(Arrays.asList("that", "there")); /** * Constructor for RelaxedStringMatchSieve */ public RelaxedStringMatchSieve(JCas jCas, List<Cluster> clusters, List<Mention> mentions) { super(jCas, clusters, mentions); } @Override public void sieve() { // Text says nominal mention, we assume that to be mean Entity List<Mention> mentions = getMentionsWithHead(MentionType.ENTITY).stream() .filter(m -> !EXCLUDED.contains(m.getHead().toLowerCase())) .collect(Collectors.toList()); for (int i = 0; i < mentions.size(); i++) { final Mention a = mentions.get(i); for (int j = i + 1; j < mentions.size(); j++) { final Mention b = mentions.get(j); if (a.getHead().equalsIgnoreCase(b.getHead())) { addToCluster(a, b); } } } } }