package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
import org.apache.lucene.util.collections.IntIterator;
import org.apache.lucene.util.collections.IntToObjectMap;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Generates {@link FacetResult} from the count arrays aggregated for a particular
* {@link FacetRequest}.
* The generated {@link FacetResult} is a subtree of the taxonomy tree.
* Its root node, {@link FacetResult#getFacetResultNode()},
* is the facet specified by {@link FacetRequest#getCategoryPath()},
* and the enumerated children, {@link FacetResultNode#getSubResults()}, of each node in that
* {@link FacetResult} are the top K ( = {@link FacetRequest#getNumResults()}) among its children
* in the taxonomy.
* Top in the sense {@link FacetRequest#getSortBy()},
* which can be by the values aggregated in the count arrays, or by ordinal numbers;
* also specified is the sort order, {@link FacetRequest#getSortOrder()},
* ascending or descending, of these values or ordinals before their top K are selected.
* The depth (number of levels excluding the root) of the
* {@link FacetResult} tree is specified by {@link FacetRequest#getDepth()}.
* <p>
* Because the number of selected children of each node is restricted,
* and not the overall number of nodes in the {@link FacetResult}, facets not selected
* into {@link FacetResult} might have better values, or ordinals, (typically,
* higher counts), than facets that are selected into the {@link FacetResult}.
* <p>
* The generated {@link FacetResult} also provides with
* {@link FacetResult#getNumValidDescendants()}, which returns the total number of facets
* that are descendants of the root node, no deeper than {@link FacetRequest#getDepth()}, and
* which have valid value. The rootnode itself is not counted here.
* Valid value is determined by the {@link FacetResultsHandler}.
* {@link TopKInEachNodeHandler} defines valid as != 0.
* <p>
* <b>NOTE:</b> this code relies on the assumption that {@link TaxonomyReader#INVALID_ORDINAL} == -1, a smaller
* value than any valid ordinal.
*
* @lucene.experimental
*/
public class TopKInEachNodeHandler extends FacetResultsHandler {
public TopKInEachNodeHandler(TaxonomyReader taxonomyReader,
FacetRequest facetRequest) {
super(taxonomyReader, facetRequest);
}
/**
* Recursively explore all facets that can be potentially included in the
* {@link FacetResult} to be generated, and that belong to the given
* partition, so that values can be examined and collected. For each such
* node, gather its top K ({@link FacetRequest#getNumResults()}) children
* among its children that are encountered in the given particular partition
* (aka current counting list).
*
* @return {@link IntermediateFacetResult} consisting of
* {@link IntToObjectMap} that maps potential
* {@link FacetResult} nodes to their top K children encountered in
* the current partition. Note that the mapped potential tree nodes
* need not belong to the given partition, only the top K children
* mapped to. The aim is to identify nodes that are certainly excluded
* from the {@link FacetResult} to be eventually (after going through
* all the partitions) returned by this handler, because they have K
* better siblings, already identified in this partition. For the
* identified excluded nodes, we only count number of their
* descendants in the subtree (to be included in
* {@link FacetResult#getNumValidDescendants()}), but not bother with
* selecting top K in these generations, which, by definition, are,
* too, excluded from the FacetResult tree.
* @param arrays the already filled in count array, potentially only covering
* one partition: the ordinals ranging from
* @param offset to <code>offset</code> + the length of the count arrays
* within <code>arrays</code> (exclusive)
* @throws IOException in case
* {@link TaxonomyReader#getOrdinal(org.apache.lucene.facet.taxonomy.CategoryPath)}
* does.
* @see FacetResultsHandler#fetchPartitionResult(FacetArrays, int)
*/
@Override
public IntermediateFacetResult fetchPartitionResult(FacetArrays arrays, int offset) throws IOException {
// get the root of the result tree to be returned, and the depth of that result tree
// (depth means number of node levels excluding the root).
int rootNode = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
if (rootNode == TaxonomyReader.INVALID_ORDINAL) {
return null;
}
int K = Math.min(facetRequest.getNumResults(),taxonomyReader.getSize()); // number of best results in each node
// this will grow into the returned IntermediateFacetResult
IntToObjectMap<AACO> AACOsOfOnePartition = new IntToObjectMap<AACO>();
int partitionSize = arrays.getArraysLength(); // all partitions, except, possibly, the last,
// have the same length. Hence modulo is OK.
int depth = facetRequest.getDepth();
if (depth == 0) {
// Need to only have root node.
IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
facetRequest, AACOsOfOnePartition);
if (isSelfPartition(rootNode, arrays, offset)) {
tempFRWH.isRootNodeIncluded = true;
tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
}
return tempFRWH;
}
if (depth > Short.MAX_VALUE - 3) {
depth = Short.MAX_VALUE -3;
}
int endOffset = offset + partitionSize; // one past the largest ordinal in the partition
ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
int[] youngestChild = childrenArray.getYoungestChildArray();
int[] olderSibling = childrenArray.getOlderSiblingArray();
int totalNumOfDescendantsConsidered = 0; // total number of facets with value != 0,
// in the tree. These include those selected as top K in each node, and all the others that
// were not. Not including rootNode
// the following priority queue will be used again and again for each node recursed into
// to select its best K children among its children encountered in the given partition
PriorityQueue<AggregatedCategory> pq =
new AggregatedCategoryHeap(K, this.getSuitableACComparator());
// reusables will feed the priority queue in each use
AggregatedCategory [] reusables = new AggregatedCategory[2+K];
for (int i = 0; i < reusables.length; i++) {
reusables[i] = new AggregatedCategory(1,0);
}
/*
* The returned map is built by a recursive visit of potential tree nodes. Nodes
* determined to be excluded from the FacetResult are not recursively explored as others,
* they are only recursed in order to count the number of their descendants.
* Also, nodes that they and any of their descendants can not be mapped into facets encountered
* in this partition, are, too, explored no further. These are facets whose ordinal
* numbers are greater than the ordinals of the given partition. (recall that the Taxonomy
* maintains that a parent ordinal is smaller than any of its descendants' ordinals).
* So, when scanning over all children of a potential tree node n: (1) all children with ordinal number
* greater than those in the given partition are skipped over, (2) among the children of n residing
* in this partition, the best K children are selected (using pq) for usual further recursion
* and the rest (those rejected out from the pq) are only recursed for counting total number
* of descendants, and (3) all the children of ordinal numbers smaller than the given partition
* are further explored in the usual way, since these may lead to descendants residing in this partition.
*
* ordinalStack drives the recursive descent.
* Top of stack holds the current node which we recurse from.
* ordinalStack[0] holds the root of the facetRequest, and
* it is always maintained that parent(ordianlStack[i]) = ordinalStack[i-1].
* localDepth points to the current top of ordinalStack.
* Only top of ordinalStack can be TaxonomyReader.INVALID_ORDINAL, and this if and only if
* the element below it explored all its relevant children.
*/
int[] ordinalStack = new int[depth+2]; // for 0 and for invalid on top
ordinalStack[0] = rootNode;
int localDepth = 0;
/*
* bestSignlingsStack[i] maintains the best K children of ordinalStack[i-1], namely,
* the best K siblings of ordinalStack[i], best K among those residing in the given partition.
* Note that the residents of ordinalStack need not belong
* to the current partition, only the residents of bestSignlingsStack.
* When exploring the children of ordianlStack[i-1] that reside in the current partition
* (after the top K of them have been determined and stored into bestSignlingsStack[i]),
* siblingExplored[i] points into bestSignlingsStack[i], to the child now explored, hence
* residing in ordinalStack[i], and firstToTheLeftOfPartition[i] holds the largest ordinal of
* a sibling smaller than the ordinals in the partition.
* When siblingExplored[i] == max int, the top K siblings of ordinalStack[i] among those siblings
* that reside in this partition have not been determined yet.
* if siblingExplored[i] < 0, the node in ordinalStack[i] is to the left of partition
* (i.e. of a smaller ordinal than the current partition)
* (step (3) above is executed for the children of ordianlStack[i-1])
*/
int[][] bestSignlingsStack = new int[depth+2][];
int[] siblingExplored = new int[depth+2];
int[] firstToTheLeftOfPartition = new int [depth+2];
int tosOrdinal; // top of stack element, the ordinal at the top of stack
/*
* to start the loop, complete the datastructures for root node:
* push its youngest child to ordinalStack; make a note in siblingExplored[] that the children
* of rootNode, which reside in the current partition have not been read yet to select the top
* K of them. Also, make rootNode as if, related to its parent, rootNode belongs to the children
* of ordinal numbers smaller than those of the current partition (this will ease on end condition --
* we can continue to the older sibling of rootNode once the localDepth goes down, before we verify that
* it went that down)
*/
ordinalStack[++localDepth] = youngestChild[rootNode];
siblingExplored[localDepth] = Integer.MAX_VALUE; // we have not verified position wrt current partition
siblingExplored[0] = -1; // as if rootNode resides to the left of current position
/*
* now the whole recursion: loop as long as stack is not empty of elements descendants of
* facetRequest's root.
*/
while (localDepth > 0) {
tosOrdinal = ordinalStack[localDepth];
if (tosOrdinal == TaxonomyReader.INVALID_ORDINAL) {
// the brotherhood that has been occupying the top of stack is all exhausted.
// Hence, element below tos, namely, father of tos, has all its children,
// and itself, all explored.
localDepth--;
// replace this father, now on top of stack, by this father's sibling:
// this parent's ordinal can not be greater than current partition, as otherwise
// its child, now just removed, would not have been pushed on it.
// so the father is either inside the partition, or smaller ordinal
if (siblingExplored[localDepth] < 0 ) {
ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]];
continue;
}
// in this point, siblingExplored[localDepth] between 0 and number of bestSiblings
// it can not be max int
siblingExplored[localDepth]--;
if (siblingExplored[localDepth] == -1 ) {
//siblings residing in the partition have been all processed, we now move
// to those of ordinal numbers smaller than the partition
ordinalStack[localDepth] = firstToTheLeftOfPartition[localDepth];
} else {
// still explore siblings residing in the partition
// just move to the next one
ordinalStack[localDepth] = bestSignlingsStack[localDepth][siblingExplored[localDepth]];
}
continue;
} // endof tosOrdinal is invalid, and hence removed, and its parent was replaced by this
// parent's sibling
// now try to push a kid, but first look at tos whether it 'deserves' its kids explored:
// it is not to the right of current partition, and we know whether to only count or to
// select best K siblings.
if (siblingExplored[localDepth] == Integer.MAX_VALUE) {
//tosOrdinal was not examined yet for its position relative to current partition
// and the best K of current partition, among its siblings, have not been determined yet
while (tosOrdinal >= endOffset) {
tosOrdinal = olderSibling[tosOrdinal];
}
// now it is inside. Run it and all its siblings inside the partition through a heap
// and in doing so, count them, find best K, and sum into residue
double residue = 0f; // the sum of all the siblings from this partition that do not make
// it to top K
pq.clear();
//reusables are consumed as from a stack. The stack starts full and returns full.
int tosReuslables = reusables.length -1;
while (tosOrdinal >= offset) { // while tosOrdinal belongs to the given partition; here, too, we use the fact
// that TaxonomyReader.INVALID_ORDINAL == -1 < offset
double value = facetRequest.getValueOf(arrays, tosOrdinal % partitionSize);
if (value != 0) { // the value of yc is not 0, it is to be considered.
totalNumOfDescendantsConsidered++;
// consume one reusable, and push to the priority queue
AggregatedCategory ac = reusables[tosReuslables--];
ac.ordinal = tosOrdinal;
ac.value = value;
ac = pq.insertWithOverflow(ac);
if (null != ac) {
residue += ac.value;
// TODO (Facet): could it be that we need to do something
// else, not add, depending on the aggregator?
/* when a facet is excluded from top K, because already in this partition it has
* K better siblings, it is only recursed for count only.
*/
// update totalNumOfDescendants by the now excluded node and all its descendants
totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap
// and now return it and all its descendants. These will never make it to FacetResult
totalNumOfDescendantsConsidered += countOnly (ac.ordinal, youngestChild,
olderSibling, arrays, partitionSize, offset, endOffset, localDepth, depth);
reusables[++tosReuslables] = ac;
}
}
tosOrdinal = olderSibling[tosOrdinal];
}
// now pq has best K children of ordinals that belong to the given partition.
// Populate a new AACO with them.
// tosOrdinal is now first sibling smaller than partition, make a note of that
firstToTheLeftOfPartition[localDepth] = tosOrdinal;
int aaci = pq.size();
int[] ords = new int[aaci];
double [] vals = new double [aaci];
while (aaci > 0) {
AggregatedCategory ac = pq.pop();
ords[--aaci] = ac.ordinal;
vals[aaci] = ac.value;
reusables[++tosReuslables] = ac;
}
// if more than 0 ordinals, add this AACO to the map to be returned,
// and add ords to sibling stack, and make a note in siblingExplored that these are to
// be visited now
if (ords.length > 0) {
AACOsOfOnePartition.put(ordinalStack[localDepth-1], new AACO(ords,vals,residue));
bestSignlingsStack[localDepth] = ords;
siblingExplored[localDepth] = ords.length-1;
ordinalStack[localDepth] = ords[ords.length-1];
} else {
// no ordinals siblings of tosOrdinal in current partition, move to the left of it
// tosOrdinal is already there (to the left of partition).
// make a note of it in siblingExplored
ordinalStack[localDepth] = tosOrdinal;
siblingExplored[localDepth] = -1;
}
continue;
} // endof we did not check the position of a valid ordinal wrt partition
// now tosOrdinal is a valid ordinal, inside partition or to the left of it, we need
// to push its kids on top of it, if not too deep.
// Make a note that we did not check them yet
if (localDepth >= depth) {
// localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL
ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL;
continue;
}
ordinalStack[++localDepth] = youngestChild[tosOrdinal];
siblingExplored[localDepth] = Integer.MAX_VALUE;
} // endof loop while stack is not empty
// now generate a TempFacetResult from AACOsOfOnePartition, and consider self.
IntermediateFacetResultWithHash tempFRWH = new IntermediateFacetResultWithHash(
facetRequest, AACOsOfOnePartition);
if (isSelfPartition(rootNode, arrays, offset)) {
tempFRWH.isRootNodeIncluded = true;
tempFRWH.rootNodeValue = this.facetRequest.getValueOf(arrays, rootNode % partitionSize);
}
tempFRWH.totalNumOfFacetsConsidered = totalNumOfDescendantsConsidered;
return tempFRWH;
}
/**
* Recursively count <code>ordinal</code>, whose depth is <code>currentDepth</code>,
* and all its descendants down to <code>maxDepth</code> (including),
* descendants whose value in the count arrays, <code>arrays</code>, is != 0.
* The count arrays only includes the current partition, from <code>offset</code>, to (exclusive)
* <code>endOffset</code>.
* It is assumed that <code>ordinal</code> < <code>endOffset</code>,
* otherwise, not <code>ordinal</code>, and none of its descendants, reside in
* the current partition. <code>ordinal</code> < <code>offset</code> is allowed,
* as ordinal's descendants might be >= <code>offeset</code>.
*
* @param ordinal a facet ordinal.
* @param youngestChild mapping a given ordinal to its youngest child in the taxonomy (of largest ordinal number),
* or to -1 if has no children.
* @param olderSibling mapping a given ordinal to its older sibling, or to -1
* @param arrays values for the ordinals in the given partition
* @param offset the first (smallest) ordinal in the given partition
* @param partitionSize number of ordinals in the given partition
* @param endOffset one larger than the largest ordinal that belong to this partition
* @param currentDepth the depth or ordinal in the TaxonomyTree (relative to rootnode of the facetRequest)
* @param maxDepth maximal depth of descendants to be considered here (measured relative to rootnode of the
* facetRequest).
*
* @return the number of nodes, from ordinal down its descendants, of depth <= maxDepth,
* which reside in the current partition, and whose value != 0
*/
private int countOnly(int ordinal, int[] youngestChild, int[] olderSibling,
FacetArrays arrays, int partitionSize, int offset,
int endOffset, int currentDepth, int maxDepth) {
int ret = 0;
if (offset <= ordinal) {
// ordinal belongs to the current partition
if (0 != facetRequest.getValueOf(arrays, ordinal % partitionSize)) {
ret++;
}
}
// now consider children of ordinal, if not too deep
if (currentDepth >= maxDepth) {
return ret;
}
int yc = youngestChild[ordinal];
while (yc >= endOffset) {
yc = olderSibling[yc];
}
while (yc > TaxonomyReader.INVALID_ORDINAL) { // assuming this is -1, smaller than any legal ordinal
ret += countOnly (yc, youngestChild, olderSibling, arrays,
partitionSize, offset, endOffset, currentDepth+1, maxDepth);
yc = olderSibling[yc];
}
return ret;
}
/**
* Merge several partitions' {@link IntermediateFacetResult}-s into one of the
* same format
*
* @see FacetResultsHandler#mergeResults(IntermediateFacetResult...)
*/
@Override
public IntermediateFacetResult mergeResults(IntermediateFacetResult... tmpResults)
throws ClassCastException, IllegalArgumentException {
if (tmpResults.length == 0) {
return null;
}
int i=0;
// skip over null tmpResults
for (; (i < tmpResults.length)&&(tmpResults[i] == null); i++) {}
if (i == tmpResults.length) {
// all inputs are null
return null;
}
// i points to the first non-null input
int K = this.facetRequest.getNumResults(); // number of best result in each node
IntermediateFacetResultWithHash tmpToReturn = (IntermediateFacetResultWithHash)tmpResults[i++];
// now loop over the rest of tmpResults and merge each into tmpToReturn
for ( ; i < tmpResults.length; i++) {
IntermediateFacetResultWithHash tfr = (IntermediateFacetResultWithHash)tmpResults[i];
tmpToReturn.totalNumOfFacetsConsidered += tfr.totalNumOfFacetsConsidered;
if (tfr.isRootNodeIncluded) {
tmpToReturn.isRootNodeIncluded = true;
tmpToReturn.rootNodeValue = tfr.rootNodeValue;
}
// now merge the HashMap of tfr into this of tmpToReturn
IntToObjectMap<AACO> tmpToReturnMapToACCOs = tmpToReturn.mapToAACOs;
IntToObjectMap<AACO> tfrMapToACCOs = tfr.mapToAACOs;
IntIterator tfrIntIterator = tfrMapToACCOs.keyIterator();
//iterate over all ordinals in tfr that are maps to their children (and the residue over
// non included chilren)
while (tfrIntIterator.hasNext()) {
int tfrkey = tfrIntIterator.next();
AACO tmpToReturnAACO = null;
if (null == (tmpToReturnAACO = tmpToReturnMapToACCOs.get(tfrkey))) {
// if tmpToReturn does not have any kids of tfrkey, map all the kids
// from tfr to it as one package, along with their redisude
tmpToReturnMapToACCOs.put(tfrkey, tfrMapToACCOs.get(tfrkey));
} else {
// merge the best K children of tfrkey as appear in tmpToReturn and in tfr
AACO tfrAACO = tfrMapToACCOs.get(tfrkey);
int resLength = tfrAACO.ordinals.length + tmpToReturnAACO.ordinals.length;
if (K < resLength) {
resLength = K;
}
int[] resOrds = new int [resLength];
double[] resVals = new double [resLength];
double resResidue = tmpToReturnAACO.residue + tfrAACO.residue;
int indexIntoTmpToReturn = 0;
int indexIntoTFR = 0;
ACComparator merger = getSuitableACComparator(); // by facet Request
for (int indexIntoRes = 0; indexIntoRes < resLength; indexIntoRes++) {
if (indexIntoTmpToReturn >= tmpToReturnAACO.ordinals.length) {
//tmpToReturnAACO (former result to return) ran out of indices
// it is all merged into resOrds and resVal
resOrds[indexIntoRes] = tfrAACO.ordinals[indexIntoTFR];
resVals[indexIntoRes] = tfrAACO.values[indexIntoTFR];
indexIntoTFR++;
continue;
}
if (indexIntoTFR >= tfrAACO.ordinals.length) {
// tfr ran out of indices
resOrds[indexIntoRes] = tmpToReturnAACO.ordinals[indexIntoTmpToReturn];
resVals[indexIntoRes] = tmpToReturnAACO.values[indexIntoTmpToReturn];
indexIntoTmpToReturn++;
continue;
}
// select which goes now to res: next (ord, value) from tmpToReturn or from tfr:
if (merger.leftGoesNow( tmpToReturnAACO.ordinals[indexIntoTmpToReturn],
tmpToReturnAACO.values[indexIntoTmpToReturn],
tfrAACO.ordinals[indexIntoTFR],
tfrAACO.values[indexIntoTFR])) {
resOrds[indexIntoRes] = tmpToReturnAACO.ordinals[indexIntoTmpToReturn];
resVals[indexIntoRes] = tmpToReturnAACO.values[indexIntoTmpToReturn];
indexIntoTmpToReturn++;
} else {
resOrds[indexIntoRes] = tfrAACO.ordinals[indexIntoTFR];
resVals[indexIntoRes] = tfrAACO.values[indexIntoTFR];
indexIntoTFR++;
}
} // end of merge of best kids of tfrkey that appear in tmpToReturn and its kids that appear in tfr
// altogether yielding no more that best K kids for tfrkey, not to appear in the new shape of
// tmpToReturn
while (indexIntoTmpToReturn < tmpToReturnAACO.ordinals.length) {
resResidue += tmpToReturnAACO.values[indexIntoTmpToReturn++];
}
while (indexIntoTFR < tfrAACO.ordinals.length) {
resResidue += tfrAACO.values[indexIntoTFR++];
}
//update the list of best kids of tfrkey as appear in tmpToReturn
tmpToReturnMapToACCOs.put(tfrkey, new AACO(resOrds, resVals, resResidue));
} // endof need to merge both AACO -- children and residue for same ordinal
} // endof loop over all ordinals in tfr
} // endof loop over all temporary facet results to merge
return tmpToReturn;
}
private static class AggregatedCategoryHeap extends PriorityQueue<AggregatedCategory> {
private ACComparator merger;
public AggregatedCategoryHeap(int size, ACComparator merger) {
super(size);
this.merger = merger;
}
@Override
protected boolean lessThan(AggregatedCategory arg1, AggregatedCategory arg2) {
return merger.leftGoesNow(arg2.ordinal, arg2.value, arg1.ordinal, arg1.value);
}
}
private static class ResultNodeHeap extends PriorityQueue<FacetResultNode> {
private ACComparator merger;
public ResultNodeHeap(int size, ACComparator merger) {
super(size);
this.merger = merger;
}
@Override
protected boolean lessThan(FacetResultNode arg1, FacetResultNode arg2) {
return merger.leftGoesNow(arg2.getOrdinal(), arg2.getValue(), arg1.getOrdinal(), arg1.getValue());
}
}
/**
* @return the {@link ACComparator} that reflects the order,
* expressed in the {@link FacetRequest}, of
* facets in the {@link FacetResult}.
*/
private ACComparator getSuitableACComparator() {
if (facetRequest.getSortOrder() == SortOrder.ASCENDING) {
switch (facetRequest.getSortBy()) {
case VALUE:
return new AscValueACComparator();
case ORDINAL:
return new AscOrdACComparator();
}
} else {
switch (facetRequest.getSortBy()) {
case VALUE:
return new DescValueACComparator();
case ORDINAL:
return new DescOrdACComparator();
}
}
return null;
}
/**
* A comparator of two Aggregated Categories according to the order
* (ascending / descending) and item (ordinal or value) specified in the
* FacetRequest for the FacetResult to be generated
*/
private static abstract class ACComparator {
ACComparator() { }
protected abstract boolean leftGoesNow (int ord1, double val1, int ord2, double val2);
}
private static final class AscValueACComparator extends ACComparator {
AscValueACComparator() { }
@Override
protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
return (val1 < val2);
}
}
private static final class DescValueACComparator extends ACComparator {
DescValueACComparator() { }
@Override
protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
return (val1 > val2);
}
}
private static final class AscOrdACComparator extends ACComparator {
AscOrdACComparator() { }
@Override
protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
return (ord1 < ord2);
}
}
private static final class DescOrdACComparator extends ACComparator {
DescOrdACComparator() { }
@Override
protected boolean leftGoesNow (int ord1, double val1, int ord2, double val2) {
return (ord1 > ord2);
}
}
/**
* Intermediate result to hold counts from one or more partitions processed
* thus far. Its main field, constructor parameter <i>mapToAACOs</i>, is a map
* from ordinals to AACOs. The AACOs mapped to contain ordinals and values
* encountered in the count arrays of the partitions processed thus far. The
* ordinals mapped from are their parents, and they may be not contained in
* the partitions processed thus far. All nodes belong to the taxonomy subtree
* defined at the facet request, constructor parameter <i>facetReq</i>, by its
* root and depth.
*/
public static class IntermediateFacetResultWithHash implements IntermediateFacetResult {
protected IntToObjectMap<AACO> mapToAACOs;
FacetRequest facetRequest;
boolean isRootNodeIncluded; // among the ordinals in the partitions
// processed thus far
double rootNodeValue; // the value of it, in case encountered.
int totalNumOfFacetsConsidered; // total number of facets
// which belong to facetRequest subtree and have value != 0,
// and have been encountered thus far in the partitions processed.
// root node of result tree is not included in this count.
public IntermediateFacetResultWithHash(FacetRequest facetReq,
IntToObjectMap<AACO> mapToAACOs) {
this.mapToAACOs = mapToAACOs;
this.facetRequest = facetReq;
this.isRootNodeIncluded = false;
this.rootNodeValue = 0.0;
this.totalNumOfFacetsConsidered = 0;
}
public FacetRequest getFacetRequest() {
return this.facetRequest;
}
} // endof FacetResultWithHash
/**
* Maintains info of one entry in the filled up count array:
* an ordinal number of a category and the value aggregated for it
* (typically, that value is the count for that ordinal).
*/
private static final class AggregatedCategory {
int ordinal;
double value;
AggregatedCategory(int ord, double val) {
this.ordinal = ord;
this.value = val;
}
}
/**
* Maintains an array of <code>AggregatedCategory</code>. For space consideration, this is implemented as
* a pair of arrays, <i>ordinals</i> and <i>values</i>, rather than one array of pairs.
* Enumerated in <i>ordinals</i> are siblings,
* potential nodes of the {@link FacetResult} tree
* (i.e., the descendants of the root node, no deeper than the specified depth).
* No more than K ( = {@link FacetRequest#getNumResults()})
* siblings are enumerated, and
* <i>residue</i> holds the sum of values of the siblings rejected from the
* enumerated top K.
* @lucene.internal
*/
protected static final class AACO {
int [] ordinals; // ordinals of the best K children, sorted from best to least
double [] values; // the respective values for these children
double residue; // sum of values of all other children, that did not get into top K
AACO (int[] ords, double[] vals, double r) {
this.ordinals = ords;
this.values = vals;
this.residue = r;
}
}
@Override
/**
* Recursively label the first facetRequest.getNumLabel() sub results
* of the root of a given {@link FacetResult}, or of an already labeled node in it.
* I.e., a node is labeled only if it is the root or all its ancestors are labeled.
*/
public void labelResult(FacetResult facetResult) throws IOException {
if (facetResult == null) {
return; // any result to label?
}
FacetResultNode rootNode = facetResult.getFacetResultNode();
recursivelyLabel(rootNode, facetRequest.getNumLabel());
}
private void recursivelyLabel(FacetResultNode node, int numToLabel) throws IOException {
if (node == null) {
return;
}
node.getLabel(this.taxonomyReader); // attach a label -- category path -- to the node
if (null == node.getSubResults()) {
return; // if node has no children -- done
}
// otherwise, label the first numToLabel of these children, and recursively -- their children.
int numLabeled = 0;
for (FacetResultNode frn : node.getSubResults()) {
// go over the children of node from first to last, no more than numToLable of them
recursivelyLabel(frn, numToLabel);
if (++numLabeled >= numToLabel) {
return;
}
}
}
@Override
// verifies that the children of each node are sorted by the order
// specified by the facetRequest.
// the values in these nodes may have changed due to a re-count, for example
// following the accumulation by Sampling.
// so now we test and re-order if necessary.
public FacetResult rearrangeFacetResult(FacetResult facetResult) {
PriorityQueue<FacetResultNode> nodesHeap =
new ResultNodeHeap(this.facetRequest.getNumResults(), this.getSuitableACComparator());
MutableFacetResultNode topFrn = (MutableFacetResultNode) facetResult.getFacetResultNode(); // safe cast
rearrangeChilrenOfNode(topFrn, nodesHeap);
return facetResult;
}
private void rearrangeChilrenOfNode(FacetResultNode node,
PriorityQueue<FacetResultNode> nodesHeap) {
nodesHeap.clear(); // just to be safe
for (FacetResultNode frn : node.getSubResults()) {
nodesHeap.add(frn);
}
int size = nodesHeap.size();
ArrayList<FacetResultNode> subResults = new ArrayList<FacetResultNode>(size);
while (nodesHeap.size()>0) {
subResults.add(0,nodesHeap.pop());
}
((MutableFacetResultNode)node).setSubResults(subResults);
for (FacetResultNode frn : node.getSubResults()) {
rearrangeChilrenOfNode(frn, nodesHeap);
}
}
@Override
public FacetResult renderFacetResult(IntermediateFacetResult tmpResult) throws IOException {
IntermediateFacetResultWithHash tmp = (IntermediateFacetResultWithHash) tmpResult;
int ordinal = this.taxonomyReader.getOrdinal(this.facetRequest.getCategoryPath());
if ((tmp == null) || (ordinal == TaxonomyReader.INVALID_ORDINAL)) {
return null;
}
double value = Double.NaN;
if (tmp.isRootNodeIncluded) {
value = tmp.rootNodeValue;
}
MutableFacetResultNode root = generateNode (ordinal, value, tmp.mapToAACOs);
return new FacetResult (tmp.facetRequest, root, tmp.totalNumOfFacetsConsidered);
}
private MutableFacetResultNode generateNode (int ordinal, double val, IntToObjectMap<AACO> mapToAACOs) {
MutableFacetResultNode node = new MutableFacetResultNode(ordinal, val);
AACO aaco = mapToAACOs.get(ordinal);
if (null == aaco) {
return node;
}
List<FacetResultNode> list = new ArrayList<FacetResultNode>();
for (int i = 0; i < aaco.ordinals.length; i++) {
list.add(generateNode(aaco.ordinals[i], aaco.values[i], mapToAACOs));
}
node.setSubResults(list);
node.setResidue(aaco.residue);
return node;
}
}