package uk.co.flax.biosolr.ontology.search.solr; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.function.Function; import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.co.flax.biosolr.ontology.api.AccumulatedFacetEntry; import uk.co.flax.biosolr.ontology.api.FacetEntry; import uk.co.flax.biosolr.ontology.api.OntologyEntryBean; import uk.co.flax.biosolr.ontology.search.OntologySearch; import uk.co.flax.biosolr.ontology.search.ResultsList; import uk.co.flax.biosolr.ontology.search.SearchEngineException; /** * <p> * Implementation of the {@link FacetTreeBuilder} that only relies on the * node ID and child node ID fields to build the hierarchical facet tree. * </p> * <p> * This works from the bottom-level up, searching to find records whose * child node list contains the current level's node IDs. The search is * repeated until there are no further child nodes to filter on. Once * a complete list of records is available, the top-level node(s) are found, * and then used as a base to recurse through the remainder of the records, * working out where they fall in the hierarchy. * </p> */ public class ChildNodeFacetTreeBuilder implements FacetTreeBuilder { private static final Logger LOGGER = LoggerFactory.getLogger(ChildNodeFacetTreeBuilder.class); /** The fields required when searching for the nodes to use in the tree. */ private static final List<String> FIELD_LIST = Arrays.asList( SolrOntologySearch.URI_FIELD, SolrOntologySearch.CHILD_URI_FIELD, SolrOntologySearch.LABEL_FIELD, SolrOntologySearch.SHORT_FORM_FIELD); private final OntologySearch ontologySearch; public ChildNodeFacetTreeBuilder(OntologySearch ontologySearch) { this.ontologySearch = ontologySearch; } @Override public List<FacetEntry> buildFacetTree(List<FacetEntry> entries) { // Extract the URIs from the facet entries Set<String> uriSet = extractIdsFromFacets(entries); // Find all parent nodes for the incoming URIs Map<String, OntologyEntryBean> annotationMap = findParentNodes(uriSet); // Find the bottom-level nodes, if there are any which haven't been looked up uriSet.removeAll(annotationMap.keySet()); annotationMap.putAll(filterEntriesByField(uriSet, SolrOntologySearch.URI_FIELD)); // Find the top node(s) Set<String> topUris = findTopLevelNodes(annotationMap); LOGGER.debug("Found {} top level nodes", topUris.size()); // Convert the original facets to a map, keyed by URI Map<String, Long> facetCounts = entries.stream().collect(Collectors.toMap(FacetEntry::getLabel, FacetEntry::getCount)); // Now collate the nodes into level-based tree(s) List<FacetEntry> facetTrees = new ArrayList<>(topUris.size()); for (String uri : topUris) { FacetEntry fe = buildAccumulatedEntryTree(0, annotationMap.get(uri), facetCounts, annotationMap); facetTrees.add(fe); } return facetTrees; } /** * Find all parent nodes for the given set of URIs. * @param uris the starting set of URIs. * @return a map of nodes, keyed by their URIs. */ private Map<String, OntologyEntryBean> findParentNodes(Collection<String> uris) { Map<String, OntologyEntryBean> parentNodes = new HashMap<>(); Set<String> childrenFound = new HashSet<>(); Set<String> childUris = new HashSet<>(uris); while (childUris.size() > 0) { // Find the direct parents for the current child URIs Map<String, OntologyEntryBean> parents = filterEntriesByField(childUris, SolrOntologySearch.CHILD_URI_FIELD); parentNodes.putAll(parents); childrenFound.addAll(childUris); // Get the IDs for all the retrieved nodes - these are the next set of // nodes whose parents should be found. childUris = parents.keySet(); // Strip out any nodes we've already looked up childUris.removeAll(childrenFound); }; return parentNodes; } /** * Extract the label values from the facets and return them as a set. * (Labels are expected to hold the facet IDs.) * @param entries the facets whose labels are required. * @return a set containing all of the labels. */ private Set<String> extractIdsFromFacets(List<FacetEntry> entries) { return entries.stream().map(FacetEntry::getLabel).collect(Collectors.toSet()); } /** * Fetch the EFO annotations containing one or more URIs in a particular field. * @param uris the URIs to check for. * @param uriField the field to filter against. * @return a map of URI to ontology entry for the incoming URIs. */ private Map<String, OntologyEntryBean> filterEntriesByField(Collection<String> uris, String uriField) { Map<String, OntologyEntryBean> annotationMap = new HashMap<>(); if (uris.size() > 0) { LOGGER.debug("Looking up {} ontology entries in field {}", uris.size(), uriField); String query = "*:*"; String filters = buildFilterString(uriField, uris); try { ResultsList<OntologyEntryBean> results = ontologySearch.searchOntology( query, Arrays.asList(filters), 0, uris.size(), FIELD_LIST); annotationMap = results.getResults().stream().collect(Collectors.toMap(OntologyEntryBean::getUri, Function.identity())); } catch (SearchEngineException e) { LOGGER.error("Problem getting ontology entries for filter {}: {}", filters, e.getMessage()); } } return annotationMap; } /** * Build a filter string for a set of URIs. * @param uris * @return a filter string. */ private String buildFilterString(String field, Collection<String> uris) { StringBuilder sb = new StringBuilder(field).append(":("); int idx = 0; for (String uri : uris) { if (idx > 0) { sb.append(" OR "); } sb.append("\"").append(uri).append("\""); idx ++; } sb.append(")"); return sb.toString(); } /** * Find all of the top-level records in a set of annotations. This is done by ooping * through the annotations and finding any other annotations in the set which * contain the current annotation in their child list. * @param annotations a map of annotation ID to annotation entries to check over. * @return a set containing the identifiers for all of the top-level * annotations found. */ private Set<String> findTopLevelNodes(Map<String, OntologyEntryBean> annotations) { Set<String> topLevel = new HashSet<>(); for (String uri : annotations.keySet()) { boolean found = false; // Check each annotation in the set to see if this // URI is in their child list for (OntologyEntryBean anno : annotations.values()) { if (anno.getChildUris() != null && anno.getChildUris().contains(uri)) { // URI is in the child list - not top-level found = true; break; } } if (!found) { // URI Is not in any child lists - must be top-level topLevel.add(uri); } } return topLevel; } /** * Recursively build an accumulated facet entry tree. * @param level current level in the tree (used for debugging/logging). * @param node the current node. * @param facetCounts the facet counts, keyed by node ID. * @param annotationMap the map of annotations (either in the original facet set, * or parent entries of those entries). * @return an {@link AccumulatedFacetEntry} containing details for the current node and all * sub-nodes down to the lowest leaf which has a facet count. */ private AccumulatedFacetEntry buildAccumulatedEntryTree(int level, OntologyEntryBean node, Map<String, Long> facetCounts, Map<String, OntologyEntryBean> annotationMap) { // Build the child hierarchy for this entry SortedSet<AccumulatedFacetEntry> childHierarchy = new TreeSet<>(Collections.reverseOrder()); long childTotal = 0; if (node.getChildUris() != null) { // Loop through all the direct child URIs, looking for those which are in the annotation map for (String childUri : node.getChildUris()) { if (annotationMap.containsKey(childUri)) { // Found a child of this node - recurse to build its facet tree LOGGER.trace("[{}] Building subAfe for {}", level, childUri); AccumulatedFacetEntry subAfe = buildAccumulatedEntryTree(level + 1, annotationMap.get(childUri), facetCounts, annotationMap); // childTotal is the total facet hits below the current level childTotal += subAfe.getTotalCount(); childHierarchy.add(subAfe); LOGGER.trace("[{}] subAfe total: {} - child Total {}, child count {}", level, subAfe.getTotalCount(), childTotal, childHierarchy.size()); } } } // Get the count and label for this entry long count = getFacetCount(node.getUri(), facetCounts); String label = getLabelForNode(node); // Build the accumulated facet entry LOGGER.trace("[{}] Building AFE for {}", level, node.getUri()); return new AccumulatedFacetEntry(node.getUri(), label, count, childTotal, childHierarchy); } /** * Get the count for the facet with the given key. * @param key the key to look up. * @param facetCounts the map of facet counts. * @return the count, or <code>0</code> if the key does not exist in the map. */ private long getFacetCount(String key, Map<String, Long> facetCounts) { long ret = 0; if (facetCounts.containsKey(key)) { ret = facetCounts.get(key); } return ret; } /** * Get the label for a node. * @param node the node whose label is required. * @return the label - either the first value in the label list, or the * first value in the shortForm list, or the URI if neither of those is * available. */ private String getLabelForNode(OntologyEntryBean node) { String label = node.getUri(); if (node.getLabel() != null && !node.getLabel().isEmpty()) { label = node.getLabel().get(0); } else if (node.getShortForm() != null && !node.getShortForm().isEmpty()) { label = node.getShortForm().get(0); } return label; } }