/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.handler.component; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.util.BytesRef; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.SolrIndexSearcher; /** * FieldFacetStats is a utility to accumulate statistics on a set of values in one field, * for facet values present in another field. * <p> * 9/10/2009 - Moved out of StatsComponent to allow open access to UnInvertedField * @see org.apache.solr.handler.component.StatsComponent * */ public class FieldFacetStats { public final String name; final StatsField statsField; final SchemaField facet_sf; public final Map<String, StatsValues> facetStatsValues; private final Map<Integer, Integer> missingStats; List<HashMap<String, Integer>> facetStatsTerms; final LeafReader topLevelReader; LeafReaderContext leave; final ValueSource valueSource; LeafReaderContext context; FunctionValues values; SortedDocValues topLevelSortedValues = null; public FieldFacetStats(SolrIndexSearcher searcher, SchemaField facet_sf, StatsField statsField) { this.statsField = statsField; this.facet_sf = facet_sf; this.name = facet_sf.getName(); topLevelReader = searcher.getSlowAtomicReader(); valueSource = facet_sf.getType().getValueSource(facet_sf, null); facetStatsValues = new HashMap<>(); facetStatsTerms = new ArrayList<>(); missingStats = new HashMap<>(); } private StatsValues getStatsValues(String key) throws IOException { StatsValues stats = facetStatsValues.get(key); if (stats == null) { stats = StatsValuesFactory.createStatsValues(statsField); facetStatsValues.put(key, stats); stats.setNextReader(context); } return stats; } // docID is relative to the context public void facet(int docID) throws IOException { final String key = values.exists(docID) ? values.strVal(docID) : null; final StatsValues stats = getStatsValues(key); stats.accumulate(docID); } // Function to keep track of facet counts for term number. // Currently only used by UnInvertedField stats public boolean facetTermNum(int docID, int statsTermNum) throws IOException { if (topLevelSortedValues == null) { topLevelSortedValues = DocValues.getSorted(topLevelReader, name); } if (docID > topLevelSortedValues.docID()) { topLevelSortedValues.advance(docID); } int term; if (docID == topLevelSortedValues.docID()) { term = topLevelSortedValues.ordValue(); } else { term = -1; } int arrIdx = term; if (arrIdx >= 0 && arrIdx < topLevelSortedValues.getValueCount()) { final String key; if (term == -1) { key = null; } else { key = topLevelSortedValues.lookupOrd(term).utf8ToString(); } while (facetStatsTerms.size() <= statsTermNum) { facetStatsTerms.add(new HashMap<String, Integer>()); } final Map<String, Integer> statsTermCounts = facetStatsTerms.get(statsTermNum); Integer statsTermCount = statsTermCounts.get(key); if (statsTermCount == null) { statsTermCounts.put(key, 1); } else { statsTermCounts.put(key, statsTermCount + 1); } return true; } return false; } //function to accumulate counts for statsTermNum to specified value public boolean accumulateTermNum(int statsTermNum, BytesRef value) throws IOException { if (value == null) return false; while (facetStatsTerms.size() <= statsTermNum) { facetStatsTerms.add(new HashMap<String, Integer>()); } for (Map.Entry<String, Integer> pairs : facetStatsTerms.get(statsTermNum).entrySet()) { String key = (String) pairs.getKey(); StatsValues facetStats = facetStatsValues.get(key); if (facetStats == null) { facetStats = StatsValuesFactory.createStatsValues(statsField); facetStatsValues.put(key, facetStats); } Integer count = (Integer) pairs.getValue(); if (count != null) { facetStats.accumulate(value, count); } } return true; } public void setNextReader(LeafReaderContext ctx) throws IOException { this.context = ctx; values = valueSource.getValues(Collections.emptyMap(), ctx); for (StatsValues stats : facetStatsValues.values()) { stats.setNextReader(ctx); } } public void facetMissingNum(int docID) throws IOException { if (topLevelSortedValues == null) { topLevelSortedValues = DocValues.getSorted(topLevelReader, name); } if (docID > topLevelSortedValues.docID()) { topLevelSortedValues.advance(docID); } if (docID == topLevelSortedValues.docID()) { int ord = topLevelSortedValues.ordValue(); Integer missingCount = missingStats.get(ord); if (missingCount == null) { missingStats.put(ord, 1); } else { missingStats.put(ord, missingCount + 1); } } } public void accumulateMissing() throws IOException { StatsValues statsValue; for (Map.Entry<Integer, Integer> entry : missingStats.entrySet()) { if (entry.getKey() >= 0) { String key = topLevelSortedValues.lookupOrd(entry.getKey()).utf8ToString(); if ((statsValue = facetStatsValues.get(key)) != null) { statsValue.addMissing(entry.getValue()); } } } return; } }