/** * Copyright (c) 2015 Lemur Consulting Ltd. * <p/> * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * <p/> * http://www.apache.org/licenses/LICENSE-2.0 * <p/> * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package uk.co.flax.biosolr.builders; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import java.util.stream.Collectors; import org.apache.commons.lang.StringUtils; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocSet; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SyntaxError; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import uk.co.flax.biosolr.FacetTreeParameters; import uk.co.flax.biosolr.TreeFacetField; /** * FacetTreeBuilder implementation that uses parent node IDs to build a * tree from the bottom node upwards. * * <p> * Minimum required parameters for this tree builder are the node field, * either passed in local parameters or taken from the key value, and * the parent node field. {@link #initialiseParameters(SolrParams)} will * throw a SyntaxError if these values are not defined. * </p> * * @author mlp */ public class ParentNodeFacetTreeBuilder extends AbstractFacetTreeBuilder { private static final Logger LOGGER = LoggerFactory.getLogger(ParentNodeFacetTreeBuilder.class); private String parentField; private int maxLevels; private final Set<String> docFields = new HashSet<>(); @Override public void initialiseParameters(SolrParams localParams) throws SyntaxError { // Initialise the common fields super.initialiseParameters(localParams); // Initialise the parent field - REQUIRED parentField = localParams.get(FacetTreeParameters.PARENT_FIELD_PARAM); if (StringUtils.isBlank(parentField)) { throw new SyntaxError("Missing parent field definition in " + localParams); } // Initialise the optional fields maxLevels = localParams.getInt(FacetTreeParameters.LEVELS_PARAM, 0); docFields.addAll(Arrays.asList(getNodeField(), parentField)); if (hasLabelField()) { docFields.add(getLabelField()); } } @Override public List<TreeFacetField> processFacetTree(SolrIndexSearcher searcher, Map<String, Integer> facetMap) throws IOException { checkFieldsInSchema(searcher, docFields); // Extract the facet keys to a volatile set Set<String> facetKeys = new HashSet<>(facetMap.keySet()); // Build a map of parent - child node IDs. This should contain the parents // of all our starting facet terms. Map<String, Set<String>> nodeChildren = findParentEntries(searcher, facetKeys); // Find the top nodes Set<String> topNodes = findTopLevelNodes(nodeChildren); LOGGER.debug("Found {} top level nodes", topNodes.size()); // Convert to a list of TreeFacetFields return topNodes.parallelStream() .map(node -> buildAccumulatedEntryTree(0, node, nodeChildren, facetMap)) .collect(Collectors.toList()); } /** * Find all parent nodes for the given set of items. * @param searcher the searcher for the collection being used. * @param facetValues the starting set of node IDs. * @return a map of nodes, keyed by their IDs. * @throws IOException */ private Map<String, Set<String>> findParentEntries(SolrIndexSearcher searcher, Collection<String> facetValues) throws IOException { Map<String, Set<String>> nodeParentIds = new HashMap<>(); Set<String> nodesFound = new HashSet<>(); Set<String> nodeIds = new HashSet<>(facetValues); int count = 0; while (nodeIds.size() > 0 && (maxLevels == 0 || maxLevels >= count)) { // Find the direct parents for the current node IDs Map<String, Set<String>> parents = findParentIdsForNodes(searcher, nodeIds); nodeParentIds.putAll(parents); nodesFound.addAll(nodeIds); // Get the parent IDs from all the retrieved nodes - these are the next set of // nodes whose parents should be found. nodeIds = parents.values().stream() .flatMap(v -> v.stream()) .filter(id -> !nodesFound.contains(id)) .collect(Collectors.toSet()); count ++; }; // Now, invert the map, so it's a map of parent->child IDs Map<String, Set<String>> parentChildIds = new HashMap<>(); for (Entry<String, Set<String>> entry : nodeParentIds.entrySet()) { for (String parentId : entry.getValue()) { if (!parentChildIds.containsKey(parentId)) { parentChildIds.put(parentId, new HashSet<String>()); } parentChildIds.get(parentId).add(entry.getKey()); } } return parentChildIds; } private Map<String, Set<String>> findParentIdsForNodes(SolrIndexSearcher searcher, Collection<String> nodeIds) throws IOException { Map<String, Set<String>> parentIds = new HashMap<>(); LOGGER.debug("Looking up parents for {} nodes", nodeIds.size()); Query filter = buildFilterQuery(getNodeField(), nodeIds); LOGGER.trace("Filter query: {}", filter); DocSet docs = searcher.getDocSet(filter); for (DocIterator it = docs.iterator(); it.hasNext(); ) { Document doc = searcher.doc(it.nextDoc(), docFields); String nodeId = doc.get(getNodeField()); Set<String> parentIdValues = new HashSet<>(Arrays.asList(doc.getValues(parentField))); parentIds.put(nodeId, parentIdValues); // Record the label, if required if (isLabelRequired(nodeId)) { recordLabel(nodeId, doc.getValues(getLabelField())); } } return parentIds; } /** * Build a filter query for a field using a set of values, taken from the keys * of a {@link NamedList}. * @param field * @param values * @return a filter string. */ private Query buildFilterQuery(String field, Collection<String> values) { BooleanQuery.Builder builder = new BooleanQuery.Builder().setDisableCoord(true); values.stream() .map(v -> new TermQuery(new Term(field, v))) .forEach(tq -> builder.add(tq, Occur.SHOULD)); return builder.build(); } /** * Recursively build an accumulated facet entry tree. * @param level current level in the tree (used for debugging/logging). * @param fieldValue the current node value. * @param hierarchyMap the map of nodes (either in the original facet set, * or parents of those entries). * @param facetCounts the facet counts, keyed by node ID. * @return a {@link TreeFacetField} containing details for the current node and all * sub-nodes down to the lowest leaf which has a facet count. */ private TreeFacetField buildAccumulatedEntryTree(int level, String fieldValue, Map<String, Set<String>> hierarchyMap, Map<String, Integer> facetCounts) { // Build the child hierarchy for this entry. // We use a reverse-ordered SortedSet so entries are returned in descending // order by their total count. SortedSet<TreeFacetField> childHierarchy = new TreeSet<>(Collections.reverseOrder()); // childTotal is the total number of facet hits below this node long childTotal = 0; if (hierarchyMap.containsKey(fieldValue)) { // Loop through all the direct child URIs, looking for those which are in the annotation map for (String childId : hierarchyMap.get(fieldValue)) { if (!childId.equals(fieldValue)) { // Found a child of this node - recurse to build its facet tree LOGGER.trace("[{}] Building child tree for {}, with {} children", level, childId, (hierarchyMap.containsKey(childId) ? hierarchyMap.get(childId).size(): 0)); TreeFacetField childTree = buildAccumulatedEntryTree(level + 1, childId, hierarchyMap, facetCounts); // Only add to the total count if this node isn't already in the child hierarchy if (childHierarchy.add(childTree)) { childTotal += childTree.getTotal(); } LOGGER.trace("[{}] child tree total: {} - child Total {}, child count {}", level, childTree.getTotal(), childTotal, childHierarchy.size()); } else { LOGGER.trace("[{}] found self-referring ID {}->{}", level, fieldValue, childId); } } } // Build the accumulated facet entry LOGGER.trace("[{}] Building facet tree for {}", level, fieldValue); return new TreeFacetField(getLabel(fieldValue), fieldValue, getFacetCount(fieldValue, facetCounts), childTotal, childHierarchy); } /** * Get the count for the facet with the given key. * @param key the key to look up. * @param facetCounts the map of facet counts. * @return the count, or <code>0</code> if the key does not exist in the map. */ private long getFacetCount(String key, Map<String, Integer> facetCounts) { if (facetCounts.containsKey(key)) { return facetCounts.get(key); } return 0; } @Override protected Logger getLogger() { return LOGGER; } }