OrdinalMappingAtomicReader.java example

Explorer
solr-analytics-master
- lucene
- solr
package org.apache.lucene.facet.index;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.encoding.IntDecoder;
import org.apache.lucene.util.encoding.IntEncoder;

/**
 * A {@link FilterAtomicReader} for updating facets ordinal references,
 * based on an ordinal map. You should use this code in conjunction with merging
 * taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
 * which maps the 'old' payloads to the 'new' ones. You can use that map to
 * re-map the payloads which contain the facets information (ordinals) either
 * before or while merging the indexes.
 * <p>
 * For re-mapping the ordinals during index merge, do the following:
 * 
 * <pre class="prettyprint">
 * // merge the old taxonomy with the new one.
 * OrdinalMap map = DirectoryTaxonomyWriter.addTaxonomies();
 * int[] ordmap = map.getMap();
 * 
 * // Add the index and re-map ordinals on the go
 * DirectoryReader reader = DirectoryReader.open(oldDir);
 * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
 * IndexWriter writer = new IndexWriter(newDir, conf);
 * List<AtomicReaderContext> leaves = reader.leaves();
 *   AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()];
 *   for (int i = 0; i < leaves.size(); i++) {
 *     wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordmap);
 *   }
 * writer.addIndexes(new MultiReader(wrappedLeaves));
 * writer.commit();
 * </pre>
 * 
 * @lucene.experimental
 */
public class OrdinalMappingAtomicReader extends FilterAtomicReader {
  private final int[] ordinalMap;
  // a little obtuse: but we dont need to create Term objects this way
  private final Map<String,Map<BytesRef,CategoryListParams>> termMap = 
      new HashMap<String,Map<BytesRef,CategoryListParams>>(1);
  
  /**
   * Wraps an AtomicReader, mapping ordinals according to the ordinalMap.
   * Calls {@link #OrdinalMappingAtomicReader(AtomicReader, int[], FacetIndexingParams)
   * OrdinalMappingAtomicReader(in, ordinalMap, new DefaultFacetIndexingParams())}
   */
  public OrdinalMappingAtomicReader(AtomicReader in, int[] ordinalMap) {
    this(in, ordinalMap, new DefaultFacetIndexingParams());
  }
  
  /**
   * Wraps an AtomicReader, mapping ordinals according to the ordinalMap,
   * using the provided indexingParams.
   */
  public OrdinalMappingAtomicReader(AtomicReader in, int[] ordinalMap, FacetIndexingParams indexingParams) {
    super(in);
    this.ordinalMap = ordinalMap;
    for (CategoryListParams params: indexingParams.getAllCategoryListParams()) {
      Term term = params.getTerm();
      Map<BytesRef,CategoryListParams> fieldMap = termMap.get(term.field());
      if (fieldMap == null) {
        fieldMap = new HashMap<BytesRef,CategoryListParams>(1);
        termMap.put(term.field(), fieldMap);
      }
      fieldMap.put(term.bytes(), params);
    }
  }

  @Override
  public Fields getTermVectors(int docID) throws IOException {
    Fields fields = super.getTermVectors(docID);
    if (fields == null) {
      return null;
    } else {
      return new OrdinalMappingFields(fields);
    }
  }

  @Override
  public Fields fields() throws IOException {
    Fields fields = super.fields();
    if (fields == null) {
      return null;
    } else {
      return new OrdinalMappingFields(fields);
    }
  }
  
  private class OrdinalMappingFields extends FilterFields {

    public OrdinalMappingFields(Fields in) {
      super(in);
    }

    @Override
    public Terms terms(String field) throws IOException {
      Terms terms = super.terms(field);
      if (terms == null) {
        return terms;
      }
      Map<BytesRef,CategoryListParams> termsMap = termMap.get(field);
      if (termsMap == null) {
        return terms;
      } else {
        return new OrdinalMappingTerms(terms, termsMap);
      }
    }
  }
  
  private class OrdinalMappingTerms extends FilterTerms {
    private final Map<BytesRef,CategoryListParams> termsMap;
    
    public OrdinalMappingTerms(Terms in, Map<BytesRef,CategoryListParams> termsMap) {
      super(in);
      this.termsMap = termsMap;
    }

    @Override
    public TermsEnum iterator(TermsEnum reuse) throws IOException {
      // TODO: should we reuse the inner termsenum?
      return new OrdinalMappingTermsEnum(super.iterator(reuse), termsMap);
    }
  }
  
  private class OrdinalMappingTermsEnum extends FilterTermsEnum {
    private final Map<BytesRef,CategoryListParams> termsMap;
    
    public OrdinalMappingTermsEnum(TermsEnum in, Map<BytesRef,CategoryListParams> termsMap) {
      super(in);
      this.termsMap = termsMap;
    }

    @Override
    public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
      // TODO: we could reuse our D&P enum if we need
      DocsAndPositionsEnum inner = super.docsAndPositions(liveDocs, reuse, flags);
      if (inner == null) {
        return inner;
      }
      
      CategoryListParams params = termsMap.get(term());
      if (params == null) {
        return inner;
      }
      
      return new OrdinalMappingDocsAndPositionsEnum(inner, params);
    }
  }
  
  private class OrdinalMappingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
    private final IntEncoder encoder;
    private final IntDecoder decoder;
    private final ByteArrayOutputStream os = new ByteArrayOutputStream();
    private final BytesRef payloadOut = new BytesRef();

    public OrdinalMappingDocsAndPositionsEnum(DocsAndPositionsEnum in, CategoryListParams params) {
      super(in);
      encoder = params.createEncoder();
      decoder = encoder.createMatchingDecoder();
    }

    @Override
    public BytesRef getPayload() throws IOException {
      BytesRef payload = super.getPayload();
      if (payload == null) {
        return payload;
      } else {
        InputStream is = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
        decoder.reInit(is);
        os.reset();
        encoder.reInit(os);
        long ordinal;
        while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
          int newOrdinal = ordinalMap[(int)ordinal];
          encoder.encode(newOrdinal);      
        }
        encoder.close();
        // TODO (Facet): avoid copy?
        byte out[] = os.toByteArray();
        payloadOut.bytes = out;
        payloadOut.offset = 0;
        payloadOut.length = out.length;
        return payloadOut;
      }
    }
  }
}