/*
* Copyright 2013 Future Systems
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.krakenapps.logstorage.index;
import java.io.File;
import java.io.IOException;
import java.util.NoSuchElementException;
import org.krakenapps.logstorage.file.BufferedRandomAccessFileReader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @since 0.9
* @author xeraph
*/
public class InvertedIndexReader {
private final Logger logger = LoggerFactory.getLogger(InvertedIndexReader.class.getName());
private boolean closed;
private InvertedIndexFileSet files;
private BufferedRandomAccessFileReader indexReader;
private BufferedRandomAccessFileReader dataReader;
private int posBodyOffset;
public InvertedIndexReader(InvertedIndexFileSet files) throws IOException {
this(files.getIndexFile(), files.getDataFile());
}
public InvertedIndexReader(File indexFile, File dataFile) throws IOException {
// validate index file headers
posBodyOffset = InvertedIndexUtil.readHeader(indexFile).getBodyOffset();
InvertedIndexUtil.readHeader(dataFile);
this.files = new InvertedIndexFileSet(indexFile, dataFile);
this.indexReader = new BufferedRandomAccessFileReader(files.getIndexFile());
this.dataReader = new BufferedRandomAccessFileReader(files.getDataFile());
}
public File getIndexFile() {
return files.getIndexFile();
}
public File getDataFile() {
return files.getDataFile();
}
public InvertedIndexCursor openCursor(String term) throws IOException {
return new Cursor(term);
}
public class Cursor implements InvertedIndexCursor {
// end position of segment, also recorded in index file
private long dataEndOffset;
// current data read position
private long dataPos;
// current loaded segment index (numbering from 0)
private long currentSegmentIndex;
// total posting length of current loaded segment
private long currentPostingCount;
// remaining posting count of current loaded segment (descreasing)
private long remaining;
// next will return this item
private Long prefetch = null;
// last item for delta decoding
private long last;
// search target term
private String term;
public Cursor(String term) throws IOException {
this.term = term;
// align
currentSegmentIndex = ((files.getIndexFile().length() - posBodyOffset) >> 3) - 1;
// backward segment traverse until term matches
Long postingCount = null;
while (currentSegmentIndex >= 0) {
postingCount = loadSegment(currentSegmentIndex);
if (postingCount != null) {
currentPostingCount = postingCount;
remaining = postingCount;
break;
}
currentSegmentIndex--;
}
}
private boolean loadNextSegment() throws IOException {
Long postingCount = null;
while (currentSegmentIndex > 0) {
postingCount = loadSegment(--currentSegmentIndex);
if (postingCount != null) {
currentPostingCount = postingCount;
remaining = postingCount;
return true;
}
}
return false;
}
@Override
public boolean hasNext() {
if (closed) {
return false;
}
try {
if (prefetch != null)
return true;
if (remaining <= 0) {
if (!loadNextSegment())
return false;
}
if (remaining == currentPostingCount) {
prefetch = nextId();
} else {
prefetch = last - nextId();
}
last = prefetch;
remaining--;
return prefetch != null;
} catch (IOException e) {
return false;
}
}
private Long loadSegment(long segmentIndex) throws IOException {
indexReader.seek(posBodyOffset + (segmentIndex << 3));
dataEndOffset = indexReader.readLong();
dataPos = dataEndOffset;
logger.debug("kraken logstorage: index data end offset [{}]", dataEndOffset);
dataReader.seek(dataEndOffset);
long version = nextLong();
if (version != 1)
throw new IllegalStateException("block version is not supported " + version);
long termBlockLength = nextLong();
logger.debug("kraken logstorage: index term block length [{}]", termBlockLength);
long postingBlockLength = nextLong();
logger.debug("kraken logstorage: posting block length [{}]", postingBlockLength);
// find target term while iterate
long beginOfTermBlock = dataPos - termBlockLength;
logger.debug("kraken logstorage: begin [{}], data pos [{}]", beginOfTermBlock, dataPos);
Long postingOffset = null;
Long postingCount = null;
while (dataPos > beginOfTermBlock) {
long termLen = nextLong();
String t = nextString(termLen);
long termCount = nextLong();
long offset = nextLong();
int diff = t.compareTo(term);
if (diff == 0) {
postingOffset = offset;
postingCount = termCount;
break;
} else if (diff < 0)
break;
if (logger.isDebugEnabled())
logger.debug("kraken logstorage: term {}, count {}, offset {}", new Object[] { t, termCount, offset });
}
if (postingOffset == null)
return null;
// relative to absolute position
long size = 1 + InvertedIndexWriter.lengthOfRawNumber(long.class, termBlockLength)
+ InvertedIndexWriter.lengthOfRawNumber(long.class, postingBlockLength) + termBlockLength
+ postingBlockLength;
dataPos = dataEndOffset - size + 1 + postingOffset;
logger.debug("kraken logstorage: data [{}~{}] term [{}] posting block length [{}] posting offset [{}]", new Object[] {
dataPos, dataEndOffset, termBlockLength, postingBlockLength, postingOffset });
dataReader.seek(dataPos);
return postingCount;
}
@Override
public long next() throws IOException {
if (closed) {
String msg = "index reader is already closed, index=" + files.getIndexFile().getAbsolutePath() + ", data="
+ files.getDataFile().getAbsolutePath();
throw new IOException(msg);
}
if (!hasNext())
throw new NoSuchElementException();
long n = prefetch;
prefetch = null;
return n;
}
private long nextId() throws IOException {
long value = 0L;
byte b;
do {
value = value << 7;
b = dataReader.readByte();
value |= b & 0x7F;
} while ((b & 0x80) == 0x80);
return value;
}
private long nextLong() throws IOException {
long l = 0;
byte b;
do {
dataReader.seek(dataPos);
l <<= 7;
b = dataReader.readByte();
dataPos--;
l |= b & 0x7f;
} while ((b & 0x80) != 0);
return l;
}
private String nextString(long len) throws IOException {
dataPos -= len - 1;
dataReader.seek(dataPos);
byte[] b = new byte[(int) len];
dataReader.readFully(b);
dataReader.seek(dataPos);
dataPos--;
return new String(b, "utf-8");
}
}
public void close() {
if (closed)
return;
closed = true;
try {
indexReader.close();
} catch (IOException e) {
}
try {
dataReader.close();
} catch (IOException e) {
}
}
}