/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.facet.sortedset; import java.io.IOException; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Map; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState.OrdRange; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.MultiDocValues.OrdinalMap; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountables; import org.apache.lucene.util.BytesRef; /** * Default implementation of {@link SortedSetDocValuesFacetCounts}. You must ensure the original * {@link IndexReader} passed to the constructor is not closed whenever you use this class! */ public class DefaultSortedSetDocValuesReaderState extends SortedSetDocValuesReaderState { private final String field; private final int valueCount; /** {@link IndexReader} passed to the constructor. */ public final IndexReader reader; private final Map<String,OrdinalMap> cachedOrdMaps = new HashMap<>(); private final Map<String,OrdRange> prefixToOrdRange = new HashMap<>(); /** Creates this, pulling doc values from the default {@link * FacetsConfig#DEFAULT_INDEX_FIELD_NAME}. */ public DefaultSortedSetDocValuesReaderState(IndexReader reader) throws IOException { this(reader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME); } /** Creates this, pulling doc values from the specified * field. */ public DefaultSortedSetDocValuesReaderState(IndexReader reader, String field) throws IOException { this.field = field; this.reader = reader; // We need this to create thread-safe MultiSortedSetDV // per collector: SortedSetDocValues dv = getDocValues(); if (dv == null) { throw new IllegalArgumentException("field \"" + field + "\" was not indexed with SortedSetDocValues"); } if (dv.getValueCount() > Integer.MAX_VALUE) { throw new IllegalArgumentException("can only handle valueCount < Integer.MAX_VALUE; got " + dv.getValueCount()); } valueCount = (int) dv.getValueCount(); // TODO: we can make this more efficient if eg we can be // "involved" when OrdinalMap is being created? Ie see // each term/ord it's assigning as it goes... String lastDim = null; int startOrd = -1; // TODO: this approach can work for full hierarchy?; // TaxoReader can't do this since ords are not in // "sorted order" ... but we should generalize this to // support arbitrary hierarchy: for(int ord=0;ord<valueCount;ord++) { final BytesRef term = dv.lookupOrd(ord); String[] components = FacetsConfig.stringToPath(term.utf8ToString()); if (components.length != 2) { throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + term.utf8ToString()); } if (!components[0].equals(lastDim)) { if (lastDim != null) { prefixToOrdRange.put(lastDim, new OrdRange(startOrd, ord-1)); } startOrd = ord; lastDim = components[0]; } } if (lastDim != null) { prefixToOrdRange.put(lastDim, new OrdRange(startOrd, valueCount-1)); } } /** * Return the memory usage of this object in bytes. Negative values are illegal. */ @Override public long ramBytesUsed() { synchronized (cachedOrdMaps) { long bytes = 0; for (OrdinalMap map : cachedOrdMaps.values()) { bytes += map.ramBytesUsed(); } return bytes; } } /** * Returns nested resources of this class. * The result should be a point-in-time snapshot (to avoid race conditions). * @see Accountables */ @Override public Collection<Accountable> getChildResources() { synchronized (cachedOrdMaps) { return Accountables.namedAccountables("DefaultSortedSetDocValuesReaderState", cachedOrdMaps); } } @Override public String toString() { return "DefaultSortedSetDocValuesReaderState(field=" + field + " reader=" + reader + ")"; } /** Return top-level doc values. */ @Override public SortedSetDocValues getDocValues() throws IOException { // TODO: this is dup'd from slow composite reader wrapper ... can we factor it out to share? OrdinalMap map = null; // TODO: why are we lazy about this? It's better if ctor pays the cost, not first query? Oh, but we // call this method from ctor, ok. Also, we only ever store one entry in the map (for key=field) so // why are we using a map? synchronized (cachedOrdMaps) { map = cachedOrdMaps.get(field); if (map == null) { // uncached, or not a multi dv SortedSetDocValues dv = MultiDocValues.getSortedSetValues(reader, field); if (dv instanceof MultiDocValues.MultiSortedSetDocValues) { map = ((MultiDocValues.MultiSortedSetDocValues)dv).mapping; IndexReader.CacheHelper cacheHelper = reader.getReaderCacheHelper(); if (cacheHelper != null && map.owner == cacheHelper.getKey()) { cachedOrdMaps.put(field, map); } } return dv; } } assert map != null; int size = reader.leaves().size(); final SortedSetDocValues[] values = new SortedSetDocValues[size]; final int[] starts = new int[size+1]; long cost = 0; for (int i = 0; i < size; i++) { LeafReaderContext context = reader.leaves().get(i); final LeafReader reader = context.reader(); final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); if (fieldInfo != null && fieldInfo.getDocValuesType() != DocValuesType.SORTED_SET) { return null; } SortedSetDocValues v = reader.getSortedSetDocValues(field); if (v == null) { v = DocValues.emptySortedSet(); } values[i] = v; starts[i] = context.docBase; cost += v.cost(); } starts[size] = reader.maxDoc(); return new MultiSortedSetDocValues(values, starts, map, cost); } /** Returns mapping from prefix to {@link OrdRange}. */ @Override public Map<String,OrdRange> getPrefixToOrdRange() { return prefixToOrdRange; } /** Returns the {@link OrdRange} for this dimension. */ @Override public OrdRange getOrdRange(String dim) { return prefixToOrdRange.get(dim); } /** Indexed field we are reading. */ @Override public String getField() { return field; } @Override public IndexReader getReader() { return reader; } /** Number of unique labels. */ @Override public int getSize() { return valueCount; } }