package org.apache.lucene.index.codecs.pulsing;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.SegmentWriteState;
import org.apache.lucene.index.SegmentReadState;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.standard.StandardPostingsWriter;
import org.apache.lucene.index.codecs.standard.StandardPostingsWriterImpl;
import org.apache.lucene.index.codecs.standard.StandardPostingsReader;
import org.apache.lucene.index.codecs.standard.StandardPostingsReaderImpl;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.FieldsProducer;
import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexReader;
import org.apache.lucene.index.codecs.standard.SimpleStandardTermsIndexWriter;
import org.apache.lucene.index.codecs.standard.StandardTermsDictReader;
import org.apache.lucene.index.codecs.standard.StandardTermsDictWriter;
import org.apache.lucene.index.codecs.standard.StandardTermsIndexReader;
import org.apache.lucene.index.codecs.standard.StandardTermsIndexWriter;
import org.apache.lucene.index.codecs.standard.StandardCodec;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
/** This codec "inlines" the postings for terms that have
* low docFreq. It wraps another codec, which is used for
* writing the non-inlined terms.
*
* Currently in only inlines docFreq=1 terms, and
* otherwise uses the normal "standard" codec.
* @lucene.experimental */
public class PulsingCodec extends Codec {
private final int freqCutoff;
/** Terms with freq <= freqCutoff are inlined into terms
* dict. */
public PulsingCodec(int freqCutoff) {
name = "Pulsing";
this.freqCutoff = freqCutoff;
}
@Override
public String toString() {
return name + "(freqCutoff=" + freqCutoff + ")";
}
@Override
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
// We wrap StandardPostingsWriterImpl, but any StandardPostingsWriter
// will work:
StandardPostingsWriter docsWriter = new StandardPostingsWriterImpl(state);
// Terms that have <= freqCutoff number of docs are
// "pulsed" (inlined):
StandardPostingsWriter pulsingWriter = new PulsingPostingsWriterImpl(freqCutoff, docsWriter);
// Terms dict index
StandardTermsIndexWriter indexWriter;
boolean success = false;
try {
indexWriter = new SimpleStandardTermsIndexWriter(state);
success = true;
} finally {
if (!success) {
pulsingWriter.close();
}
}
// Terms dict
success = false;
try {
FieldsConsumer ret = new StandardTermsDictWriter(indexWriter, state, pulsingWriter, BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
return ret;
} finally {
if (!success) {
try {
pulsingWriter.close();
} finally {
indexWriter.close();
}
}
}
}
@Override
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
// We wrap StandardPostingsReaderImpl, but any StandardPostingsReader
// will work:
StandardPostingsReader docsReader = new StandardPostingsReaderImpl(state.dir, state.segmentInfo, state.readBufferSize);
StandardPostingsReader pulsingReader = new PulsingPostingsReaderImpl(docsReader);
// Terms dict index reader
StandardTermsIndexReader indexReader;
boolean success = false;
try {
indexReader = new SimpleStandardTermsIndexReader(state.dir,
state.fieldInfos,
state.segmentInfo.name,
state.termsIndexDivisor,
BytesRef.getUTF8SortedAsUnicodeComparator());
success = true;
} finally {
if (!success) {
pulsingReader.close();
}
}
// Terms dict reader
success = false;
try {
FieldsProducer ret = new StandardTermsDictReader(indexReader,
state.dir, state.fieldInfos, state.segmentInfo.name,
pulsingReader,
state.readBufferSize,
BytesRef.getUTF8SortedAsUnicodeComparator(),
StandardCodec.TERMS_CACHE_SIZE);
success = true;
return ret;
} finally {
if (!success) {
try {
pulsingReader.close();
} finally {
indexReader.close();
}
}
}
}
@Override
public void files(Directory dir, SegmentInfo segmentInfo, Set<String> files) throws IOException {
StandardPostingsReaderImpl.files(dir, segmentInfo, files);
StandardTermsDictReader.files(dir, segmentInfo, files);
SimpleStandardTermsIndexReader.files(dir, segmentInfo, files);
}
@Override
public void getExtensions(Set<String> extensions) {
StandardCodec.getStandardExtensions(extensions);
}
}