BlockJoinFieldFacetAccumulator.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.join;

import java.io.IOException;
import java.util.Arrays;

import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.DocValuesFacets;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.SolrIndexSearcher;

/**
 * This class is responsible for collecting block join facet counts for particular field
 */
class BlockJoinFieldFacetAccumulator {
  private String fieldName;
  private FieldType fieldType;
  private int currentSegment = -1;
  // for term lookups only
  private SortedSetDocValues topSSDV;
  private int[] globalCounts;
  private SortedSetDocValues segmentSSDV;
  // elems are : facet value counter<<32 | last parent doc num 
  private long[] segmentAccums = new long[0];
  // for mapping per-segment ords to global ones
  private MultiDocValues.OrdinalMap ordinalMap;
  private SchemaField schemaField;
  private SortedDocValues segmentSDV;
  
  BlockJoinFieldFacetAccumulator(String fieldName, SolrIndexSearcher searcher) throws IOException {
    this.fieldName = fieldName;
    schemaField = searcher.getSchema().getField(fieldName);
    fieldType = schemaField.getType();
    ordinalMap = null;
    if (schemaField.multiValued()) {
      topSSDV = searcher.getSlowAtomicReader().getSortedSetDocValues(fieldName);
      if (topSSDV instanceof MultiDocValues.MultiSortedSetDocValues) {
        ordinalMap = ((MultiDocValues.MultiSortedSetDocValues) topSSDV).mapping;
      }
    } else {
      SortedDocValues single = searcher.getSlowAtomicReader().getSortedDocValues(fieldName);
      if (single instanceof MultiDocValues.MultiSortedDocValues) {
        ordinalMap = ((MultiDocValues.MultiSortedDocValues) single).mapping;
      }
      if (single != null) {
        topSSDV = DocValues.singleton(single);
      }
    }
  }
  
  private boolean initSegmentData(String fieldName, LeafReaderContext leaf) throws IOException {
    segmentSSDV = DocValues.getSortedSet(leaf.reader(), fieldName);
    segmentAccums  = ArrayUtil.grow(segmentAccums, (int)segmentSSDV.getValueCount()+1);//+1
    // zero counts, -1 parent
    Arrays.fill(segmentAccums,0,(int)segmentSSDV.getValueCount()+1, 0x00000000ffffffffL);
    segmentSDV = DocValues.unwrapSingleton(segmentSSDV);
    return segmentSSDV.getValueCount()!=0;// perhaps we need to count "missings"?? 
  }
  
  interface AggregatableDocIter extends DocIterator {
    void reset();
    /** a key to aggregate the current document */
    int getAggKey();
    
  }
  static class SortedIntsAggDocIterator implements AggregatableDocIter {
    private int[] childDocs;
    private int childCount;
    private int parentDoc;
    private int pos=-1;
    
    public SortedIntsAggDocIterator(int[] childDocs, int childCount, int parentDoc) {
      this.childDocs = childDocs;
      this.childCount = childCount;
      this.parentDoc = parentDoc;
    }

    
    @Override
    public boolean hasNext() {
      return pos<childCount;
    }

    @Override
    public Integer next() {
      return nextDoc();
    }

    @Override
    public int nextDoc() {
      return childDocs[pos++];
    }

    @Override
    public float score() {
      return 0;
    }
    @Override
    public void reset() {
      pos=0;
    }
    @Override
    public int getAggKey(){
      return parentDoc;
    }
  }

  void updateCountsWithMatchedBlock(AggregatableDocIter iter) throws IOException {
    if (segmentSDV != null) {
      // some codecs may optimize SORTED_SET storage for single-valued fields
      for (iter.reset(); iter.hasNext(); ) {
        final int docNum = iter.nextDoc();
        if (docNum > segmentSDV.docID()) {
          segmentSDV.advance(docNum);
        }
        
        int term;
        if (docNum == segmentSDV.docID()) {
          term = segmentSDV.ordValue();
        } else {
          term = -1;
        }
        accumulateTermOrd(term, iter.getAggKey());
        //System.out.println("doc# "+docNum+" "+fieldName+" term# "+term+" tick "+Long.toHexString(segmentAccums[1+term]));
      }
    } else {
      for (iter.reset(); iter.hasNext(); ) {
        final int docNum = iter.nextDoc();
        if (docNum > segmentSSDV.docID()) {
          segmentSSDV.advance(docNum);
        }
        if (docNum == segmentSSDV.docID()) {
          int term = (int) segmentSSDV.nextOrd();
          do { // absent values are designated by term=-1, first iteration counts [0] as "missing", and exit, otherwise it spins 
            accumulateTermOrd(term, iter.getAggKey());
          } while (term>=0 && (term = (int) segmentSSDV.nextOrd()) >= 0);
        }
      }
    }
  }
  
  String getFieldName() {
    return fieldName;
  }
  
  /** copy paste from {@link DocValuesFacets} */
  NamedList<Integer> getFacetValue() throws IOException {
    NamedList<Integer> facetValue = new NamedList<>();
    final CharsRefBuilder charsRef = new CharsRefBuilder(); // if there is no globs, take segment's ones
    for (int i = 1; i< (globalCounts!=null ? globalCounts.length: segmentAccums.length); i++) {
      int count = globalCounts!=null ? globalCounts[i] : (int)(segmentAccums [i]>>32);
      if (count > 0) {
        BytesRef term = topSSDV.lookupOrd(-1 + i);
        fieldType.indexedToReadable(term, charsRef);
        facetValue.add(charsRef.toString(), count);
      }
    }
    return facetValue;
  }
  
  // @todo we can track in max term nums to loop only changed range while migrating and labeling 
  private void accumulateTermOrd(int term, int parentDoc) {
    long accum = segmentAccums[1+term];
    if(((int)(accum & 0xffffffffL))!=parentDoc)
    {// incrementing older 32, reset smaller 32, set them to the new parent
      segmentAccums[1+term] = ((accum +(0x1L<<32))&0xffffffffL<<32)|parentDoc;
    }
  }
  
  void setNextReader(LeafReaderContext context) throws IOException {
    initSegmentData(fieldName, context);
    currentSegment = context.ord;
  }
  
  void migrateGlobal(){
    if (currentSegment<0 // no hits
        || segmentAccums.length==0 
        || ordinalMap==null) { // single segment
      return;
    }
    
    if(globalCounts==null){
      // it might be just a single segment 
        globalCounts = new int[(int) ordinalMap.getValueCount()+ /*[0] for missing*/1];
    }else{
      assert currentSegment>=0;
    }
    
    migrateGlobal(globalCounts, segmentAccums, currentSegment, ordinalMap);
  }

  /** folds counts in segment ordinal space (segCounts) into global ordinal space (counts) 
   * copy paste-from {@link DocValuesFacets#migrateGlobal(int[], int[], int, OrdinalMap)}*/
  void migrateGlobal(int counts[], long segCounts[], int subIndex, OrdinalMap map) {
    
    final LongValues ordMap = map.getGlobalOrds(subIndex);
    // missing count
    counts[0] += (int) (segCounts[0]>>32);
    
    // migrate actual ordinals
    for (int ord = 1; ord <= segmentSSDV.getValueCount(); ord++) {
      int count = (int) (segCounts[ord]>>32);
      if (count != 0) {
        counts[1+(int) ordMap.get(ord-1)] += count;
      }
    }
  }
}