package org.apache.lucene.facet.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterAtomicReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.encoding.IntDecoder;
import org.apache.lucene.util.encoding.IntEncoder;
/**
* A {@link FilterAtomicReader} for updating facets ordinal references,
* based on an ordinal map. You should use this code in conjunction with merging
* taxonomies - after you merge taxonomies, you receive an {@link OrdinalMap}
* which maps the 'old' payloads to the 'new' ones. You can use that map to
* re-map the payloads which contain the facets information (ordinals) either
* before or while merging the indexes.
* <p>
* For re-mapping the ordinals during index merge, do the following:
*
* <pre class="prettyprint">
* // merge the old taxonomy with the new one.
* OrdinalMap map = DirectoryTaxonomyWriter.addTaxonomies();
* int[] ordmap = map.getMap();
*
* // Add the index and re-map ordinals on the go
* DirectoryReader reader = DirectoryReader.open(oldDir);
* IndexWriterConfig conf = new IndexWriterConfig(VER, ANALYZER);
* IndexWriter writer = new IndexWriter(newDir, conf);
* List<AtomicReaderContext> leaves = reader.leaves();
* AtomicReader wrappedLeaves[] = new AtomicReader[leaves.size()];
* for (int i = 0; i < leaves.size(); i++) {
* wrappedLeaves[i] = new OrdinalMappingAtomicReader(leaves.get(i).reader(), ordmap);
* }
* writer.addIndexes(new MultiReader(wrappedLeaves));
* writer.commit();
* </pre>
*
* @lucene.experimental
*/
public class OrdinalMappingAtomicReader extends FilterAtomicReader {
private final int[] ordinalMap;
// a little obtuse: but we dont need to create Term objects this way
private final Map<String,Map<BytesRef,CategoryListParams>> termMap =
new HashMap<String,Map<BytesRef,CategoryListParams>>(1);
/**
* Wraps an AtomicReader, mapping ordinals according to the ordinalMap.
* Calls {@link #OrdinalMappingAtomicReader(AtomicReader, int[], FacetIndexingParams)
* OrdinalMappingAtomicReader(in, ordinalMap, new DefaultFacetIndexingParams())}
*/
public OrdinalMappingAtomicReader(AtomicReader in, int[] ordinalMap) {
this(in, ordinalMap, new DefaultFacetIndexingParams());
}
/**
* Wraps an AtomicReader, mapping ordinals according to the ordinalMap,
* using the provided indexingParams.
*/
public OrdinalMappingAtomicReader(AtomicReader in, int[] ordinalMap, FacetIndexingParams indexingParams) {
super(in);
this.ordinalMap = ordinalMap;
for (CategoryListParams params: indexingParams.getAllCategoryListParams()) {
Term term = params.getTerm();
Map<BytesRef,CategoryListParams> fieldMap = termMap.get(term.field());
if (fieldMap == null) {
fieldMap = new HashMap<BytesRef,CategoryListParams>(1);
termMap.put(term.field(), fieldMap);
}
fieldMap.put(term.bytes(), params);
}
}
@Override
public Fields getTermVectors(int docID) throws IOException {
Fields fields = super.getTermVectors(docID);
if (fields == null) {
return null;
} else {
return new OrdinalMappingFields(fields);
}
}
@Override
public Fields fields() throws IOException {
Fields fields = super.fields();
if (fields == null) {
return null;
} else {
return new OrdinalMappingFields(fields);
}
}
private class OrdinalMappingFields extends FilterFields {
public OrdinalMappingFields(Fields in) {
super(in);
}
@Override
public Terms terms(String field) throws IOException {
Terms terms = super.terms(field);
if (terms == null) {
return terms;
}
Map<BytesRef,CategoryListParams> termsMap = termMap.get(field);
if (termsMap == null) {
return terms;
} else {
return new OrdinalMappingTerms(terms, termsMap);
}
}
}
private class OrdinalMappingTerms extends FilterTerms {
private final Map<BytesRef,CategoryListParams> termsMap;
public OrdinalMappingTerms(Terms in, Map<BytesRef,CategoryListParams> termsMap) {
super(in);
this.termsMap = termsMap;
}
@Override
public TermsEnum iterator(TermsEnum reuse) throws IOException {
// TODO: should we reuse the inner termsenum?
return new OrdinalMappingTermsEnum(super.iterator(reuse), termsMap);
}
}
private class OrdinalMappingTermsEnum extends FilterTermsEnum {
private final Map<BytesRef,CategoryListParams> termsMap;
public OrdinalMappingTermsEnum(TermsEnum in, Map<BytesRef,CategoryListParams> termsMap) {
super(in);
this.termsMap = termsMap;
}
@Override
public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) throws IOException {
// TODO: we could reuse our D&P enum if we need
DocsAndPositionsEnum inner = super.docsAndPositions(liveDocs, reuse, flags);
if (inner == null) {
return inner;
}
CategoryListParams params = termsMap.get(term());
if (params == null) {
return inner;
}
return new OrdinalMappingDocsAndPositionsEnum(inner, params);
}
}
private class OrdinalMappingDocsAndPositionsEnum extends FilterDocsAndPositionsEnum {
private final IntEncoder encoder;
private final IntDecoder decoder;
private final ByteArrayOutputStream os = new ByteArrayOutputStream();
private final BytesRef payloadOut = new BytesRef();
public OrdinalMappingDocsAndPositionsEnum(DocsAndPositionsEnum in, CategoryListParams params) {
super(in);
encoder = params.createEncoder();
decoder = encoder.createMatchingDecoder();
}
@Override
public BytesRef getPayload() throws IOException {
BytesRef payload = super.getPayload();
if (payload == null) {
return payload;
} else {
InputStream is = new ByteArrayInputStream(payload.bytes, payload.offset, payload.length);
decoder.reInit(is);
os.reset();
encoder.reInit(os);
long ordinal;
while ((ordinal = decoder.decode()) != IntDecoder.EOS) {
int newOrdinal = ordinalMap[(int)ordinal];
encoder.encode(newOrdinal);
}
encoder.close();
// TODO (Facet): avoid copy?
byte out[] = os.toByteArray();
payloadOut.bytes = out;
payloadOut.offset = 0;
payloadOut.length = out.length;
return payloadOut;
}
}
}
}