package org.apache.lucene.facet.index;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.PayloadProcessorProvider;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
import org.apache.lucene.util.encoding.IntDecoder;
import org.apache.lucene.util.encoding.IntEncoder;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link PayloadProcessorProvider} for updating facets ordinal references,
* based on an ordinal map. You should use this code in conjunction with merging
* taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
* which maps the 'old' payloads to the 'new' ones. You can use that map to
* re-map the payloads which contain the facets information (ordinals) either
* before or while merging the indexes.
* <p>
* For re-mapping the ordinals before you merge the indexes, do the following:
*
* <pre>
* // merge the old taxonomy with the new one.
* OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
* int[] ordmap = map.getMap();
*
* // re-map the ordinals on the old directory.
* Directory oldDir;
* FacetsPayloadProcessorProvider fppp = new FacetsPayloadProcessorProvider(
* oldDir, ordmap);
* IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
* conf.setMergePolicy(new ForceOptimizeMergePolicy());
* IndexWriter writer = new IndexWriter(oldDir, conf);
* writer.setPayloadProcessorProvider(fppp);
* writer.forceMerge(1);
* writer.close();
*
* // merge that directory with the new index.
* IndexWriter newWriter; // opened on the 'new' Directory
* newWriter.addIndexes(oldDir);
* newWriter.commit();
* </pre>
*
* For re-mapping the ordinals during index merge, do the following:
*
* <pre>
* // merge the old taxonomy with the new one.
* OrdinalMap map = LuceneTaxonomyWriter.addTaxonomies();
* int[] ordmap = map.getMap();
*
* // Add the index and re-map ordinals on the go
* IndexReader r = IndexReader.open(oldDir);
* IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
* IndexWriter writer = new IndexWriter(newDir, conf);
* writer.setPayloadProcessorProvider(fppp);
* writer.addIndexes(r);
* writer.commit();
* </pre>
* <p>
* <b>NOTE:</b> while the second example looks simpler, IndexWriter may trigger
* a long merge due to addIndexes. The first example avoids this perhaps
* unneeded merge, as well as can be done separately (e.g. on another node)
* before the index is merged.
*
* @lucene.experimental
*/
public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider {
private final Directory workDir;
private final DirPayloadProcessor dirProcessor;
/**
* Construct FacetsPayloadProcessorProvider with FacetIndexingParams
*
* @param dir the {@link Directory} containing the segments to update
* @param ordinalMap an array mapping previous facets ordinals to new ones
* @param indexingParams the facets indexing parameters
*/
public FacetsPayloadProcessorProvider(Directory dir, int[] ordinalMap,
FacetIndexingParams indexingParams) {
workDir = dir;
dirProcessor = new FacetsDirPayloadProcessor(indexingParams, ordinalMap);
}
@Override
public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException {
if (workDir != dir) {
return null;
}
return dirProcessor;
}
public static class FacetsDirPayloadProcessor extends DirPayloadProcessor {
private final Map<Term, CategoryListParams> termMap = new HashMap<Term, CategoryListParams>(1);
private final int[] ordinalMap;
/**
* Construct FacetsDirPayloadProcessor with custom FacetIndexingParams
* @param ordinalMap an array mapping previous facets ordinals to new ones
* @param indexingParams the facets indexing parameters
*/
protected FacetsDirPayloadProcessor(FacetIndexingParams indexingParams, int[] ordinalMap) {
this.ordinalMap = ordinalMap;
for (CategoryListParams params: indexingParams.getAllCategoryListParams()) {
termMap.put(params.getTerm(), params);
}
}
@Override
public PayloadProcessor getProcessor(Term term) throws IOException {
CategoryListParams params = termMap.get(term);
if (params == null) {
return null;
}
return new FacetsPayloadProcessor(params, ordinalMap);
}
}
/** A PayloadProcessor for updating facets ordinal references, based on an ordinal map */
public static class FacetsPayloadProcessor extends PayloadProcessor {
private final IntEncoder encoder;
private final IntDecoder decoder;
private final int[] ordinalMap;
private final ByteArrayOutputStream os = new ByteArrayOutputStream();
/**
* @param params defines the encoding of facet ordinals as payload
* @param ordinalMap an array mapping previous facets ordinals to new ones
*/
protected FacetsPayloadProcessor(CategoryListParams params, int[] ordinalMap) {
encoder = params.createEncoder();
decoder = encoder.createMatchingDecoder();
this.ordinalMap = ordinalMap;
}
@Override
public int payloadLength() throws IOException {
return os.size();
}
@Override
public byte[] processPayload(byte[] payload, int start, int length) throws IOException {
InputStream is = new ByteArrayInputStream(payload, start, length);
decoder.reInit(is);
os.reset();
encoder.reInit(os);
long ordinal;
while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
int newOrdinal = ordinalMap[(int)ordinal];
encoder.encode(newOrdinal);
}
encoder.close();
return os.toByteArray();
}
}
}