FacetsPayloadProcessorProvider.java example

Explorer
pylucene-master
package org.apache.lucene.facet.index;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;

import org.apache.lucene.index.PayloadProcessorProvider;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;

import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
import org.apache.lucene.util.encoding.IntDecoder;
import org.apache.lucene.util.encoding.IntEncoder;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/**
 * A {@link PayloadProcessorProvider} for updating facets ordinal references,
 * based on an ordinal map. You should use this code in conjunction with merging
 * taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
 * which maps the 'old' payloads to the 'new' ones. You can use that map to
 * re-map the payloads which contain the facets information (ordinals) either
 * before or while merging the indexes.
 * <p>
 * For re-mapping the ordinals before you merge the indexes, do the following:
 * 
 * <pre>
 * // merge the old taxonomy with the new one.
 * OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
 * int[] ordmap = map.getMap();
 * 
 * // re-map the ordinals on the old directory.
 * Directory oldDir;
 * FacetsPayloadProcessorProvider fppp = new FacetsPayloadProcessorProvider(
 *     oldDir, ordmap);
 * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
 * conf.setMergePolicy(new ForceOptimizeMergePolicy());
 * IndexWriter writer = new IndexWriter(oldDir, conf);
 * writer.setPayloadProcessorProvider(fppp);
 * writer.forceMerge(1);
 * writer.close();
 * 
 * // merge that directory with the new index.
 * IndexWriter newWriter; // opened on the 'new' Directory
 * newWriter.addIndexes(oldDir);
 * newWriter.commit();
 * </pre>
 * 
 * For re-mapping the ordinals during index merge, do the following:
 * 
 * <pre>
 * // merge the old taxonomy with the new one.
 * OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
 * int[] ordmap = map.getMap();
 * 
 * // Add the index and re-map ordinals on the go
 * IndexReader r = IndexReader.open(oldDir);
 * IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
 * IndexWriter writer = new IndexWriter(newDir, conf);
 * writer.setPayloadProcessorProvider(fppp);
 * writer.addIndexes(r);
 * writer.commit();
 * </pre>
 * <p>
 * <b>NOTE:</b> while the second example looks simpler, IndexWriter may trigger
 * a long merge due to addIndexes. The first example avoids this perhaps
 * unneeded merge, as well as can be done separately (e.g. on another node)
 * before the index is merged.
 * 
 * @lucene.experimental
 */
public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider {
  
  private final Directory workDir;
  
  private final DirPayloadProcessor dirProcessor;

  /**
   * Construct FacetsPayloadProcessorProvider with FacetIndexingParams
   * 
   * @param dir the {@link Directory} containing the segments to update
   * @param ordinalMap an array mapping previous facets ordinals to new ones
   * @param indexingParams the facets indexing parameters
   */
  public FacetsPayloadProcessorProvider(Directory dir, int[] ordinalMap,
                                        FacetIndexingParams indexingParams) {
    workDir = dir;
    dirProcessor = new FacetsDirPayloadProcessor(indexingParams, ordinalMap);
  }
  
  @Override
  public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException {
    if (workDir != dir) {
      return null;
    }
    return dirProcessor;
  }
  
  public static class FacetsDirPayloadProcessor extends DirPayloadProcessor {
    
    private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);
    
    private final int[] ordinalMap;
    
    /**
     * Construct FacetsDirPayloadProcessor with custom FacetIndexingParams
     * @param ordinalMap an array mapping previous facets ordinals to new ones
     * @param indexingParams the facets indexing parameters
     */
    protected FacetsDirPayloadProcessor(FacetIndexingParams indexingParams, int[] ordinalMap) {
      this.ordinalMap = ordinalMap;
      for (CategoryListParams params: indexingParams.getAllCategoryListParams()) {
        termMap.put(params.getTerm(), params);
      }
    }
    
    @Override
    public PayloadProcessor getProcessor(Term term) throws IOException {
      CategoryListParams params = termMap.get(term);
      if (params == null) {
        return null;
      }
      return new FacetsPayloadProcessor(params, ordinalMap);
    }

  }
  
  /** A PayloadProcessor for updating facets ordinal references, based on an ordinal map */
  public static class FacetsPayloadProcessor extends PayloadProcessor {
    
    private final IntEncoder encoder;
    private final IntDecoder decoder;
    private final int[] ordinalMap;
    private final ByteArrayOutputStream os = new ByteArrayOutputStream();
    
    /**
     * @param params defines the encoding of facet ordinals as payload
     * @param ordinalMap an array mapping previous facets ordinals to new ones
     */
    protected FacetsPayloadProcessor(CategoryListParams params, int[] ordinalMap) {
      encoder = params.createEncoder();
      decoder = encoder.createMatchingDecoder();
      this.ordinalMap = ordinalMap;
    }
    
    @Override
    public int payloadLength() throws IOException {
      return os.size();
    }

    @Override
    public byte[] processPayload(byte[] payload, int start, int length) throws IOException {
      InputStream is = new ByteArrayInputStream(payload, start, length);
      decoder.reInit(is);
      os.reset();
      encoder.reInit(os);
      long ordinal;
      while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
        int newOrdinal = ordinalMap[(int)ordinal];
        encoder.encode(newOrdinal);      
      }
      encoder.close();
      return os.toByteArray();
    }
  }
  
}