//Dstl (c) Crown Copyright 2017 package uk.gov.dstl.baleen.uima.utils; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.function.Function; import java.util.function.Predicate; import java.util.stream.Stream; import org.apache.uima.fit.util.JCasUtil; import org.apache.uima.jcas.JCas; import com.google.common.collect.HashMultimap; import com.google.common.collect.Multimap; import uk.gov.dstl.baleen.types.Base; import uk.gov.dstl.baleen.types.semantic.Entity; import uk.gov.dstl.baleen.types.semantic.ReferenceTarget; /** * Helper for working with reference targets. */ public class ReferentUtils { public static final Predicate<? super Base> NOT_ENTITY_OR_REFERENT = e -> !(e instanceof Entity) && e.getReferent() == null; public static final Predicate<? super Base> ENTITY_OR_REFERENT = e -> e instanceof Entity || e.getReferent() != null; /** * Instantiates a new referent utils. */ private ReferentUtils() { // Singleton } /** * Creates the referent map - map of referent target to entities. * * @param <T> * the generic type * @param jCas * the j cas * @param clazz * the clazz * @return the multimap */ public static <T extends Base> Multimap<ReferenceTarget, T> createReferentMap(JCas jCas, Class<T> clazz) { final Collection<T> potentialReferences = JCasUtil.select(jCas, clazz); final Multimap<ReferenceTarget, T> targets = HashMultimap.create(); potentialReferences.stream() .filter(p -> p.getReferent() != null) .forEach(e -> { final ReferenceTarget referent = e.getReferent(); targets.put(referent, e); }); return targets; } /** * Convert a multimap to a standard map (each with the same value type) * * @param <T> * the generic type * @param referentMap * the referent map * @param convert * the conversion function which will convert a list to a single entry. This does not * have to be a value from the list, and may be null (though that will drop the * corresponding key). * @return the map */ public static <T> Map<ReferenceTarget, T> filterToSingle(Multimap<ReferenceTarget, T> referentMap, Function<Collection<T>, T> convert) { final Map<ReferenceTarget, T> singleMap = new HashMap<>(referentMap.size()); referentMap.asMap().entrySet().stream() .forEach(e -> { final T t = convert.apply(e.getValue()); if (t != null) { singleMap.put(e.getKey(), t); } }); return singleMap; } /** * Gets the all annotation which are an entity or which have a referent target which is also the * referent target of an entity. * * Effectively get any annotation where the covered text is considered to be an entity. * * @param jCas * the j cas * @param referentMap * the referent map * @return the all entity or referent to entity */ public static List<Base> getAllEntityOrReferentToEntity(JCas jCas, Map<ReferenceTarget, Entity> referentMap) { return getAllAndReferents(jCas, Entity.class, referentMap); } /** * Gets the all the annotation type and all the other annotations which have a referent target * which is the referent target for an annotation of this type.. * * @param <T> * the generic type * @param jCas * the j cas * @param clazz * the clazz * @param referentMap * the referent map * @return the all and referents */ public static <T extends Base> List<Base> getAllAndReferents(JCas jCas, Class<T> clazz, Map<ReferenceTarget, T> referentMap) { final List<Base> list = new ArrayList<>(); // Add all of the original class list.addAll(JCasUtil.select(jCas, clazz)); // Now find all the referents which point to the same entity streamReferent(jCas, referentMap) // Filter out any existing classes .filter(p -> clazz.isAssignableFrom(p.getClass())) .map(referentMap::get) .forEach(list::add); return list; } /** * Stream all annotations which have a referent. * * @param jCas * the j cas * @param referentMap * the referent map * @return the stream */ public static Stream<Base> streamReferent(JCas jCas, Map<ReferenceTarget, ?> referentMap) { return JCasUtil.select(jCas, Base.class).stream() // Filter out anything we can't reference .filter(p -> p.getReferent() != null && referentMap.get(p.getReferent()) != null); } /** * Gets the longest annotation (longest by coveed text size). * * @param <T> * the generic type * @param list * the list * @return the longest single */ public static <T extends Base> T getLongestSingle(Collection<T> list) { return singleViaCompare(list, (a, b) -> Integer.compare(a.getCoveredText().length(), b.getCoveredText().length())); } /** * Get a single variable based on the compare (picks the highest). * * @param <T> * the generic type * @param list * the list * @param compare * the compare * @return the t */ public static <T> T singleViaCompare(Collection<T> list, Comparator<T> compare) { return list.stream().reduce((a, b) -> compare.compare(a, b) < 0 ? b : a).get(); } /** * Replace the mentins with the principal coreferent entity (if there is one). * * @param entities * the entities * @param referentMap * the referent map * @return the set */ public static Set<Entity> replaceWithCoreferent(Collection<Entity> entities, Map<ReferenceTarget, Entity> referentMap) { final Set<Entity> set = new HashSet<>(entities.size()); for (final Entity t : entities) { if (t.getReferent() == null) { set.add(t); } else { final Entity entity = referentMap.get(t.getReferent()); if (entity != null) { set.add(entity); } else { // Add the other in set.add(t); } } } return set; } }