/**
* Copyright (c) 2015 Lemur Consulting Ltd.
* <p/>
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package uk.co.flax.biosolr.builders;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SyntaxError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.co.flax.biosolr.FacetTreeParameters;
import uk.co.flax.biosolr.TreeFacetField;
/**
* Implementation of {@link FacetTreeBuilder} that uses a child node field
* to build a hierarchical facet tree from the bottom upwards.
*
* <p>
* Minimum required parameters for this tree builder are the node field,
* either passed in local parameters or taken from the key value, and
* the child node field. {@link #initialiseParameters(SolrParams)} will
* throw a SyntaxError if these values are not defined.
* </p>
*
* @author mlp
*/
public class ChildNodeFacetTreeBuilder extends AbstractFacetTreeBuilder {
private static final Logger LOGGER = LoggerFactory.getLogger(ChildNodeFacetTreeBuilder.class);
private String childField;
private int maxLevels;
private final Set<String> docFields = new HashSet<>();
public ChildNodeFacetTreeBuilder() {
}
@Override
protected Logger getLogger() {
return LOGGER;
}
@Override
public void initialiseParameters(SolrParams localParams) throws SyntaxError {
super.initialiseParameters(localParams);
// Initialise the child field - REQUIRED
childField = localParams.get(FacetTreeParameters.CHILD_FIELD_PARAM);
if (StringUtils.isBlank(childField)) {
throw new SyntaxError("Missing child field definition in " + localParams);
}
// Initialise the optional fields
maxLevels = localParams.getInt(FacetTreeParameters.LEVELS_PARAM, 0);
docFields.addAll(Arrays.asList(getNodeField(), childField));
if (hasLabelField()) {
docFields.add(getLabelField());
}
}
@Override
public List<TreeFacetField> processFacetTree(SolrIndexSearcher searcher, Map<String, Integer> facetMap)
throws IOException {
// Check that all of the given fields are in the searcher's schema
checkFieldsInSchema(searcher, docFields);
// Extract the facet keys to a volatile set
Set<String> facetKeys = new HashSet<>(facetMap.keySet());
// Build a map of parent - child node IDs. This should contain the parents
// of all our starting facet terms.
Map<String, Set<String>> nodeChildren = findParentEntries(searcher, facetKeys);
// Find the details for the starting facet terms, if there are any which haven't
// been found already.
facetKeys.removeAll(nodeChildren.keySet());
nodeChildren.putAll(filterEntriesByField(searcher, facetKeys, getNodeField()));
// Find the top nodes
Set<String> topNodes = findTopLevelNodes(nodeChildren);
LOGGER.debug("Found {} top level nodes", topNodes.size());
// Convert to a list of TreeFacetFields
return topNodes.parallelStream()
.map(node -> buildAccumulatedEntryTree(0, node, nodeChildren, facetMap))
.collect(Collectors.toList());
}
/**
* Find all parent nodes for the given set of items.
* @param searcher the searcher for the collection being used.
* @param facetValues the starting set of node IDs.
* @param childField the item field containing the child values.
* @return a map of nodes, keyed by their IDs.
* @throws IOException
*/
private Map<String, Set<String>> findParentEntries(SolrIndexSearcher searcher, Collection<String> facetValues)
throws IOException {
Map<String, Set<String>> parentEntries = new HashMap<>();
Set<String> childrenFound = new HashSet<>();
Set<String> childIds = new HashSet<>(facetValues);
int count = 0;
while (childIds.size() > 0 && (maxLevels == 0 || maxLevels >= count)) {
// Find the direct parents for the current child IDs
Map<String, Set<String>> parents = filterEntriesByField(searcher, childIds, childField);
parentEntries.putAll(parents);
childrenFound.addAll(childIds);
// Get the IDs for all the retrieved nodes - these are the next set of
// nodes whose parents should be found.
childIds = parents.keySet();
// Strip out any nodes we've already looked up
childIds.removeAll(childrenFound);
count ++;
};
return parentEntries;
}
/**
* Fetch facets for items containing a specific set of values.
* @param searcher the searcher for the collection being used.
* @param facetValues the incoming values to use as filters.
* @param filterField the item field containing the child values, which will be used
* to filter against.
* @return a map of node value to child values for the items.
* @throws IOException
*/
private Map<String, Set<String>> filterEntriesByField(SolrIndexSearcher searcher, Collection<String> facetValues,
String filterField) throws IOException {
Map<String, Set<String>> filteredEntries = new HashMap<>();
LOGGER.debug("Looking up {} entries in field {}", facetValues.size(), filterField);
Query filter = buildFilterQuery(filterField, facetValues);
LOGGER.trace("Filter query: {}", filter);
DocSet docs = searcher.getDocSet(filter);
for (DocIterator it = docs.iterator(); it.hasNext(); ) {
Document doc = searcher.doc(it.nextDoc(), docFields);
String nodeId = doc.get(getNodeField());
// Get the children for the node, if necessary
Set<String> childIds;
if (filterField.equals(getNodeField())) {
// Filtering on the node field - child IDs are redundant
childIds = Collections.emptySet();
} else {
childIds = new HashSet<>(Arrays.asList(doc.getValues(filterField)));
LOGGER.trace("Got {} children for node {}", childIds.size(), nodeId);
}
filteredEntries.put(nodeId, childIds);
// Record the label, if required
if (isLabelRequired(nodeId)) {
recordLabel(nodeId, doc.getValues(getLabelField()));
}
}
return filteredEntries;
}
/**
* Build a filter query for a field using a set of values, taken from the keys
* of a {@link NamedList}.
* @param field
* @param values
* @return a filter string.
*/
private Query buildFilterQuery(String field, Collection<String> values) {
BooleanQuery.Builder builder = new BooleanQuery.Builder().setDisableCoord(true);
values.stream()
.map(v -> new TermQuery(new Term(field, v)))
.forEach(tq -> builder.add(tq, Occur.SHOULD));
return builder.build();
}
/**
* Recursively build an accumulated facet entry tree.
* @param level current level in the tree (used for debugging/logging).
* @param fieldValue the current node value.
* @param hierarchyMap the map of nodes (either in the original facet set,
* or parents of those entries).
* @param facetCounts the facet counts, keyed by node ID.
* @return a {@link TreeFacetField} containing details for the current node and all
* sub-nodes down to the lowest leaf which has a facet count.
*/
private TreeFacetField buildAccumulatedEntryTree(int level, String fieldValue, Map<String, Set<String>> hierarchyMap,
Map<String, Integer> facetCounts) {
// Build the child hierarchy for this entry.
// We use a reverse-ordered SortedSet so entries are returned in descending
// order by their total count.
SortedSet<TreeFacetField> childHierarchy = new TreeSet<>(Collections.reverseOrder());
// childTotal is the total number of facet hits below this node
long childTotal = 0;
if (hierarchyMap.containsKey(fieldValue)) {
// Loop through all the direct child URIs, looking for those which are in the annotation map
for (String childId : hierarchyMap.get(fieldValue)) {
if (hierarchyMap.containsKey(childId) && !childId.equals(fieldValue)) {
// Found a child of this node - recurse to build its facet tree
LOGGER.trace("[{}] Building child tree for {}, with {} children", level, childId, hierarchyMap.get(childId).size());
TreeFacetField childTree = buildAccumulatedEntryTree(level + 1, childId, hierarchyMap, facetCounts);
// Only add to the total count if this node isn't already in the child hierarchy
if (childHierarchy.add(childTree)) {
childTotal += childTree.getTotal();
}
LOGGER.trace("[{}] child tree total: {} - child Total {}, child count {}", level, childTree.getTotal(), childTotal, childHierarchy.size());
} else {
LOGGER.trace("[{}] no node entry for {}->{}", level, fieldValue, childId);
}
}
}
// Build the accumulated facet entry
LOGGER.trace("[{}] Building facet tree for {}", level, fieldValue);
return new TreeFacetField(getLabel(fieldValue), fieldValue, getFacetCount(fieldValue, facetCounts), childTotal, childHierarchy);
}
/**
* Get the count for the facet with the given key.
* @param key the key to look up.
* @param facetCounts the map of facet counts.
* @return the count, or <code>0</code> if the key does not exist in the map.
*/
private long getFacetCount(String key, Map<String, Integer> facetCounts) {
if (facetCounts.containsKey(key)) {
return facetCounts.get(key);
}
return 0;
}
}