/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.facet.taxonomy; import java.io.IOException; import java.util.HashSet; import java.util.Set; import org.apache.lucene.facet.FacetsConfig.DimConfig; import org.apache.lucene.facet.FacetsConfig; import org.apache.lucene.facet.taxonomy.OrdinalsReader.OrdinalsSegmentReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.FilterBinaryDocValues; import org.apache.lucene.index.FilterLeafReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; /** * A {@link org.apache.lucene.index.FilterLeafReader} for updating facets ordinal references, * based on an ordinal map. You should use this code in conjunction with merging * taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap} * which maps the 'old' ordinals to the 'new' ones. You can use that map to * re-map the doc values which contain the facets information (ordinals) either * before or while merging the indexes. * <p> * For re-mapping the ordinals during index merge, do the following: * * <pre class="prettyprint"> * // merge the old taxonomy with the new one. * OrdinalMap map = new MemoryOrdinalMap(); * DirectoryTaxonomyWriter.addTaxonomy(srcTaxoDir, map); * int[] ordmap = map.getMap(); * * // Add the index and re-map ordinals on the go * DirectoryReader reader = DirectoryReader.open(oldDir); * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER); * IndexWriter writer = new IndexWriter(newDir, conf); * List<LeafReaderContext> leaves = reader.leaves(); * LeafReader wrappedLeaves[] = new LeafReader[leaves.size()]; * for (int i = 0; i < leaves.size(); i++) { * wrappedLeaves[i] = new OrdinalMappingLeafReader(leaves.get(i).reader(), ordmap); * } * writer.addIndexes(new MultiReader(wrappedLeaves)); * writer.commit(); * </pre> * * @lucene.experimental */ public class OrdinalMappingLeafReader extends FilterLeafReader { // silly way, but we need to use dedupAndEncode and it's protected on FacetsConfig. private static class InnerFacetsConfig extends FacetsConfig { InnerFacetsConfig() {} @Override public BytesRef dedupAndEncode(IntsRef ordinals) { return super.dedupAndEncode(ordinals); } } private class OrdinalMappingBinaryDocValues extends FilterBinaryDocValues { private final IntsRef ordinals = new IntsRef(32); private final OrdinalsSegmentReader ordsReader; OrdinalMappingBinaryDocValues(OrdinalsSegmentReader ordsReader, BinaryDocValues in) throws IOException { super(in); this.ordsReader = ordsReader; } @SuppressWarnings("synthetic-access") @Override public BytesRef binaryValue() { try { // NOTE: this isn't quite koscher, because in general // multiple threads can call BinaryDV.get which would // then conflict on the single ordinals instance, but // because this impl is only used for merging, we know // only 1 thread calls us: ordsReader.get(docID(), ordinals); // map the ordinals for (int i = 0; i < ordinals.length; i++) { ordinals.ints[i] = ordinalMap[ordinals.ints[i]]; } return encode(ordinals); } catch (IOException e) { throw new RuntimeException("error reading category ordinals for doc " + docID(), e); } } } private final int[] ordinalMap; private final InnerFacetsConfig facetsConfig; private final Set<String> facetFields; /** * Wraps an LeafReader, mapping ordinals according to the ordinalMap, using * the provided {@link FacetsConfig} which was used to build the wrapped * reader. */ public OrdinalMappingLeafReader(LeafReader in, int[] ordinalMap, FacetsConfig srcConfig) { super(in); this.ordinalMap = ordinalMap; facetsConfig = new InnerFacetsConfig(); facetFields = new HashSet<>(); for (DimConfig dc : srcConfig.getDimConfigs().values()) { facetFields.add(dc.indexFieldName); } // always add the default indexFieldName. This is because FacetsConfig does // not explicitly record dimensions that were indexed under the default // DimConfig, unless they have a custome DimConfig. facetFields.add(FacetsConfig.DEFAULT_DIM_CONFIG.indexFieldName); } /** * Expert: encodes category ordinals into a BytesRef. Override in case you use * custom encoding, other than the default done by FacetsConfig. */ protected BytesRef encode(IntsRef ordinals) { return facetsConfig.dedupAndEncode(ordinals); } /** * Expert: override in case you used custom encoding for the categories under * this field. */ protected OrdinalsReader getOrdinalsReader(String field) { return new DocValuesOrdinalsReader(field); } @Override public BinaryDocValues getBinaryDocValues(String field) throws IOException { if (facetFields.contains(field)) { final OrdinalsReader ordsReader = getOrdinalsReader(field); return new OrdinalMappingBinaryDocValues(ordsReader.getReader(in.getContext()), in.getBinaryDocValues(field)); } else { return in.getBinaryDocValues(field); } } @Override public CacheHelper getCoreCacheHelper() { return null; } @Override public CacheHelper getReaderCacheHelper() { return null; } }