package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Closeable;
import java.io.IOException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.DoubleBarrelLRUCache;
import org.apache.lucene.util.CloseableThreadLocal;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.alimama.mdrill.buffer.BlockBufferInput;
import com.alimama.mdrill.buffer.SmallBufferedInput;
import com.alimama.mdrill.hdfsDirectory.FileSystemDirectory;
/** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
* Directory. Pairs are accessed either by Term or by ordinal position the
* set. */
public class TermInfosReader implements Closeable {
public static Logger log = LoggerFactory.getLogger(TermInfosReader.class);
private final Directory directory;
private final String segment;
private final FieldInfos fieldInfos;
private final CloseableThreadLocal<ThreadResources> threadResources = new CloseableThreadLocal<ThreadResources>();
private final SegmentTermEnum origEnum;
private final long size;
private final TermInfoReaderIndexInterface index;
private final int indexLength;
private final int totalIndexInterval;
private final static int DEFAULT_CACHE_SIZE = 1024;
// Just adds term's ord to TermInfo
private final static class TermInfoAndOrd extends TermInfo {
final long termOrd;
public TermInfoAndOrd(TermInfo ti, long termOrd) {
super(ti);
assert termOrd >= 0;
this.termOrd = termOrd;
}
}
private static class CloneableTerm extends DoubleBarrelLRUCache.CloneableKey {
private final Term term;
public CloneableTerm(Term t) {
this.term = new Term(t.field(), t.text());
}
@Override
public Object clone() {
return new CloneableTerm(term);
}
@Override
public boolean equals(Object _other) {
CloneableTerm other = (CloneableTerm) _other;
return term.equals(other.term);
}
@Override
public int hashCode() {
return term.hashCode();
}
}
private final DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd> termsCache = new DoubleBarrelLRUCache<CloneableTerm,TermInfoAndOrd>(DEFAULT_CACHE_SIZE);
/**
* Per-thread resources managed by ThreadLocal
*/
private static final class ThreadResources {
SegmentTermEnum termEnum;
}
IndexInput tisInput=null;
IndexInput tiiInput=null;
public IndexInput tiiInputquick=null;
AtomicBoolean isQuickMode=new AtomicBoolean(false);
public AtomicBoolean supportquick=new AtomicBoolean(false);
DocValuesReader docValues=null;
public DocValuesReader getDocValues() throws CloneNotSupportedException
{
return this.docValues;
}
TermInfosReader(Directory dir, String seg, FieldInfos fis, int readBufferSize, int indexDivisor)
throws CorruptIndexException, IOException {
boolean success = false;
docValues=null;
supportquick.set(false);
if (indexDivisor < 1 && indexDivisor != -1) {
throw new IllegalArgumentException("indexDivisor must be -1 (don't load terms index) or greater than 0: got " + indexDivisor);
}
try {
directory = dir;
segment = seg;
fieldInfos = fis;
long tisfilesize=-1;
String tisFileSize=IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION_SIZE);
String quickTis=IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION_QUICK);
String quickTisTxt=IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION_QUICK_TXT);
String quickTisVal=IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION_QUICK_VAL);
if(directory.fileExists(tisFileSize))
{
IndexInput sizebuff=directory.openInput(tisFileSize, readBufferSize);
tisfilesize=sizebuff.readLong();
if(directory.fileExists(quickTis))
{
docValues=new DocValuesReader();
docValues.quicktisInput=new SmallBufferedInput(directory.openInput(quickTis, 8),8);
docValues.quicktisInputTxt=new SmallBufferedInput(directory.openInput(quickTisTxt,1024),1024);
docValues.quicktisInputVal=new SmallBufferedInput(directory.openInput(quickTisVal,8),8);
// if(directory instanceof FSDirectory){
// docValues.quicktisInput=BlockBufferInput.MaybeInstance(directory.openInput(quickTis,readBufferSize),directory,quickTis,directory.getP());
// docValues.quicktisInputTxt=BlockBufferInput.MaybeInstance(directory.openInput(quickTisTxt,readBufferSize),directory,quickTisTxt,directory.getP());
// docValues.quicktisInputVal=BlockBufferInput.MaybeInstance(directory.openInput(quickTisVal,readBufferSize),directory,quickTisVal,directory.getP());
// }else if(directory instanceof FileSystemDirectory){
// docValues.quicktisInput=BlockBufferInput.MaybeInstance(directory.openInput(quickTis,readBufferSize),directory,quickTis,directory.getP());
// docValues.quicktisInputTxt=BlockBufferInput.MaybeInstance(directory.openInput(quickTisTxt,readBufferSize),directory,quickTisTxt,directory.getP());
// docValues.quicktisInputVal=BlockBufferInput.MaybeInstance(directory.openInput(quickTisVal,readBufferSize),directory,quickTisVal,directory.getP());
// }else{}
docValues.readPosForm(sizebuff);
supportquick.set(true);
}
sizebuff.close();
}
String filename=IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_EXTENSION);
final String indexFileName = IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION);
final String indexFileNamequick = IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION_QUICK);
if (directory instanceof FSDirectory) {
tisInput = BlockBufferInput.MaybeInstance(
directory.openInput(filename, readBufferSize),
directory, filename, directory.getP());
tiiInput = directory.openInput(indexFileName, readBufferSize);
} else if (directory instanceof FileSystemDirectory) {
tisInput = BlockBufferInput.MaybeInstance(
directory.openInput(filename, readBufferSize),
directory, filename, directory.getP());
tiiInput = directory.openInput(indexFileName, readBufferSize);
} else {
tisInput = directory.openInput(filename, readBufferSize);
tiiInput = directory.openInput(indexFileName, readBufferSize);
}
if(directory.fileExists(indexFileNamequick))
{
tiiInputquick=directory.openInput(indexFileNamequick, readBufferSize);
this.isQuickMode.set(true);
}
origEnum = new SegmentTermEnum(tisInput, fieldInfos, false,tisfilesize);
size = origEnum.size;
if (indexDivisor != -1) {
long tiifilesize=-1;
String tiiFileSize=IndexFileNames.segmentFileName(segment, IndexFileNames.TERMS_INDEX_EXTENSION_SIZE);
if(directory.fileExists(tiiFileSize))
{
IndexInput sizebuff=directory.openInput(tiiFileSize, readBufferSize);
tiifilesize=sizebuff.readLong();
sizebuff.close();
}
// Load terms index
totalIndexInterval = origEnum.indexInterval * indexDivisor;
SegmentTermEnum indexEnum=null ;
try {
if(this.isQuickMode.get())
{
index = new TermInfosReaderIndexQuick(tiiInput,tiiInputquick,fieldInfos,tiifilesize, indexDivisor, dir.fileLength(indexFileName), totalIndexInterval);
}else{
indexEnum = new SegmentTermEnum(tiiInput, fieldInfos, true,tiifilesize);
index = new TermInfosReaderIndex(indexEnum, indexDivisor, dir.fileLength(indexFileName), totalIndexInterval);
}
indexLength = index.length();
} finally {
if(indexEnum!=null)
{
indexEnum.close();
}
}
} else {
// Do not load terms index:
totalIndexInterval = -1;
index = null;
indexLength = -1;
}
success = true;
} finally {
// With lock-less commits, it's entirely possible (and
// fine) to hit a FileNotFound exception above. In
// this case, we want to explicitly close any subset
// of things that were opened so that we don't have to
// wait for a GC to do so.
if (!success) {
close();
}
}
}
public int getSkipInterval() {
return origEnum.skipInterval;
}
public int getMaxSkipLevels() {
return origEnum.maxSkipLevels;
}
public final void close() throws IOException {
supportquick.set(false);
if(this.docValues!=null)
{
docValues.close();
docValues=null;
}
if(this.isQuickMode.get())
{
if (tiiInput != null)
{
tiiInput.close();
}
if (tiiInputquick != null)
{
tiiInputquick.close();
}
}
if (origEnum != null)
{
origEnum.close();
}
threadResources.close();
}
/** Returns the number of term/value pairs in the set. */
final long size() {
return size;
}
private ThreadResources getThreadResources() {
ThreadResources resources = threadResources.get();
if (resources == null) {
resources = new ThreadResources();
resources.termEnum = terms();
threadResources.set(resources);
}
return resources;
}
/** Returns the TermInfo for a Term in the set, or null. */
TermInfo get(Term term) throws IOException {
BytesRef termBytesRef = new BytesRef(term.text);
return get(term, false, termBytesRef);
}
/** Returns the TermInfo for a Term in the set, or null. */
private TermInfo get(Term term, boolean mustSeekEnum, BytesRef termBytesRef) throws IOException {
if (size == 0) return null;
ensureIndexIsRead();
final CloneableTerm cacheKey = new CloneableTerm(term);
TermInfoAndOrd tiOrd = termsCache.get(cacheKey);
if (!mustSeekEnum && tiOrd != null) {
return tiOrd;
}
ThreadResources resources = getThreadResources();
SegmentTermEnum enumerator = resources.termEnum;
if (enumerator.term() != null // term is at or past current
&& ((enumerator.prev() != null && term.compareTo(enumerator.prev())> 0)
|| term.compareTo(enumerator.term()) >= 0)) {
int enumOffset = (int)(enumerator.position/totalIndexInterval)+1;
if (indexLength == enumOffset // but before end of block
|| index.compareTo(term,termBytesRef,enumOffset) < 0) {
// no need to seek
final TermInfo ti;
int numScans = enumerator.scanTo(term);
if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
ti = enumerator.termInfo();
if (numScans > 1) {
if (tiOrd == null) {
termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
} else {
assert sameTermInfo(ti, tiOrd, enumerator);
assert (int) enumerator.position == tiOrd.termOrd;
}
}
} else {
ti = null;
}
return ti;
}
}
// random-access: must seek
final int indexPos;
if (tiOrd != null) {
indexPos = (int) (tiOrd.termOrd / totalIndexInterval);
} else {
// Must do binary search:
indexPos = index.getIndexOffset(term,termBytesRef);
}
index.seekEnum(enumerator, indexPos);
enumerator.scanTo(term);
final TermInfo ti;
if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0) {
ti = enumerator.termInfo();
if (tiOrd == null) {
termsCache.put(cacheKey, new TermInfoAndOrd(ti, enumerator.position));
} else {
assert sameTermInfo(ti, tiOrd, enumerator);
assert enumerator.position == tiOrd.termOrd;
}
} else {
ti = null;
}
return ti;
}
// called only from asserts
private final boolean sameTermInfo(TermInfo ti1, TermInfo ti2, SegmentTermEnum enumerator) {
if (ti1.docFreq != ti2.docFreq) {
return false;
}
if (ti1.freqPointer != ti2.freqPointer) {
return false;
}
if (ti1.proxPointer != ti2.proxPointer) {
return false;
}
// skipOffset is only valid when docFreq >= skipInterval:
if (ti1.docFreq >= enumerator.skipInterval &&
ti1.skipOffset != ti2.skipOffset) {
return false;
}
return true;
}
private void ensureIndexIsRead() {
if (index == null) {
throw new IllegalStateException("terms index was not loaded when this reader was created");
}
}
/** Returns the position of a Term in the set or -1. */
final long getPosition(Term term) throws IOException {
if (size == 0) return -1;
ensureIndexIsRead();
BytesRef termBytesRef = new BytesRef(term.text);
int indexOffset = index.getIndexOffset(term,termBytesRef);
SegmentTermEnum enumerator = getThreadResources().termEnum;
index.seekEnum(enumerator, indexOffset);
while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
if (term.compareTo(enumerator.term()) == 0)
return enumerator.position;
else
return -1;
}
/** Returns an enumeration of all the Terms and TermInfos in the set. */
public SegmentTermEnum terms() {
return (SegmentTermEnum)origEnum.clone();
}
/** Returns an enumeration of terms starting at or after the named term. */
public SegmentTermEnum terms(Term term) throws IOException {
BytesRef termBytesRef = new BytesRef(term.text);
get(term, true, termBytesRef);
return (SegmentTermEnum)getThreadResources().termEnum.clone();
}
}