/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.flamdex.simple;
import com.google.common.base.Charsets;
import com.indeed.util.core.reference.SharedReference;
import com.indeed.util.serialization.StringSerializer;
import com.indeed.imhotep.io.caching.CachedFile;
import com.indeed.lsmtree.core.Generation;
import com.indeed.lsmtree.core.ImmutableBTreeIndex;
import com.indeed.util.mmap.DirectMemory;
import com.indeed.util.mmap.MMapBuffer;
import org.apache.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetDecoder;
import java.util.Arrays;
/**
* @author jsgroth
*/
final class SimpleStringTermIteratorImpl implements SimpleStringTermIterator {
private static final Logger log = Logger.getLogger(SimpleStringTermIteratorImpl.class);
private static final int BUFFER_SIZE = 8192;
private final byte[] buffer;
private int bufferLen;
private long bufferOffset;
private int bufferPtr;
private final String docsFilename;
private ImmutableBTreeIndex.Reader<String, LongPair> index;
private final File indexFile;
private final CharsetDecoder decoder = Charsets.UTF_8.newDecoder();
private final SharedReference<MMapBuffer> file;
private final DirectMemory memory;
private byte[] lastTermBytes = new byte[100];
private ByteBuffer lastTermByteBuffer = ByteBuffer.wrap(lastTermBytes);
private int lastTermLength = 0;
private long lastTermOffset = 0L;
private int lastTermDocFreq = 0;
private String lastString = null;
private boolean done = false;
private boolean bufferNext = false;
private boolean closed = false;
SimpleStringTermIteratorImpl(MapCache mapCache, String filename, String docsFilename, String indexFilename) throws IOException {
buffer = new byte[BUFFER_SIZE];
this.docsFilename = docsFilename;
final CachedFile cf = CachedFile.create(indexFilename);
if (cf.exists()) {
indexFile = cf.loadDirectory();
} else {
indexFile = null;
}
file = mapCache.copyOrOpen(filename);
memory = file.get().memory();
done = false;
bufferLen = 0;
bufferOffset = 0L;
bufferPtr = 0;
}
@Override
public void reset(String term) {
try {
internalReset(term);
} catch (IOException e) {
close();
throw new RuntimeException(e);
}
}
private void internalReset(String term) throws IOException {
if (indexFile != null) {
if (index == null) {
index = new ImmutableBTreeIndex.Reader<String,LongPair>(
indexFile,
new StringSerializer(),
new LongPairSerializer(),
false
);
}
Generation.Entry<String, LongPair> e = index.floor(term);
if (e == null) {
e = index.first();
}
lastTermBytes = e.getKey().getBytes(Charsets.UTF_8);
lastTermByteBuffer = ByteBuffer.wrap(lastTermBytes);
lastTermLength = lastTermBytes.length;
lastString = null;
final LongPair p = e.getValue();
refillBuffer(p.getFirst());
lastTermOffset = p.getSecond();
lastTermDocFreq = (int)readVLong();
done = false;
while (decoder.decode((ByteBuffer)lastTermByteBuffer.position(0).limit(lastTermLength)).toString().compareTo(term) < 0 && next()) {}
bufferNext = true;
} else {
lastTermLength = 0;
lastTermOffset = 0L;
lastTermDocFreq = 0;
lastString = null;
bufferLen = 0;
bufferOffset = 0L;
bufferPtr = 0;
done = false;
while (next() && new String(lastTermBytes, 0, lastTermLength, Charsets.UTF_8).compareTo(term) < 0) {}
bufferNext = true;
}
}
@Override
public String term() {
if (lastString == null) {
try {
lastString = decoder.decode((ByteBuffer)lastTermByteBuffer.position(0).limit(lastTermLength)).toString();
} catch (CharacterCodingException e) {
throw new RuntimeException(e);
}
}
return lastString;
}
@Override
public byte[] termStringBytes() {
return lastTermBytes;
}
@Override
public int termStringLength() {
return lastTermLength;
}
@Override
public boolean next() {
try {
return internalNext();
} catch (IOException e) {
close();
throw new RuntimeException(e);
}
}
private boolean internalNext() throws IOException {
if (done) return false;
if (bufferNext) {
bufferNext = false;
return true;
}
final int firstByte = read();
if (firstByte == -1) {
done = true;
return false;
}
final int removeLen = (int)readVLong(firstByte);
final int newLen = (int)readVLong();
ensureCapacity(lastTermLength - removeLen + newLen);
readFully(lastTermBytes, lastTermLength - removeLen, newLen);
lastTermLength = lastTermLength - removeLen + newLen;
lastString = null;
final long offsetDelta = readVLong();
lastTermOffset += offsetDelta;
lastTermDocFreq = (int)readVLong();
return true;
}
private void ensureCapacity(int len) {
// TODO is > sufficient here? I think yes, verify later
if (len >= lastTermBytes.length) {
lastTermBytes = Arrays.copyOf(lastTermBytes, Math.max(len, 2*lastTermBytes.length));
lastTermByteBuffer = ByteBuffer.wrap(lastTermBytes);
}
}
@Override
public int docFreq() {
return lastTermDocFreq;
}
@Override
public void close() {
if (!closed) {
try {
if (index != null) {
index.close();
}
} catch (IOException e) {
log.error("error closing index", e);
}
try {
file.close();
} catch (IOException e) {
log.error("error closing file", e);
}
closed = true;
}
}
@Override
public String getFilename() {
return docsFilename;
}
@Override
public long getOffset() {
return lastTermOffset;
}
private int read() throws IOException {
if (bufferPtr == bufferLen) {
refillBuffer(bufferOffset + bufferLen);
if (bufferLen == 0) return -1;
}
return buffer[bufferPtr++] & 0xFF;
}
private void readFully(final byte[] b, int off, int len) throws IOException {
while (true) {
final int available = bufferLen - bufferPtr;
if (available >= len) {
System.arraycopy(buffer, bufferPtr, b, off, len);
bufferPtr += len;
return;
} else {
System.arraycopy(buffer, bufferPtr, b, off, available);
off += available;
len -= available;
refillBuffer(bufferOffset + bufferLen);
}
}
}
private void refillBuffer(final long offset) throws IOException {
bufferLen = (int)Math.min(buffer.length, memory.length() - offset);
if (bufferLen > 0) {
memory.getBytes(offset, buffer, 0, bufferLen);
}
bufferOffset = offset;
bufferPtr = 0;
}
private long readVLong(int b) throws IOException {
long ret = 0L;
int shift = 0;
do {
ret |= ((b & 0x7F) << shift);
if (b < 0x80) return ret;
shift += 7;
b = read();
} while (true);
}
private long readVLong() throws IOException {
long ret = 0L;
int shift = 0;
do {
int b = read();
ret |= ((b & 0x7F) << shift);
if (b < 0x80) return ret;
shift += 7;
} while (true);
}
}