/*
* Copyright (C) 2014 Indeed Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.indeed.imhotep.local;
import com.indeed.flamdex.api.FlamdexReader;
import com.indeed.flamdex.datastruct.FastBitSet;
import com.indeed.imhotep.BitTree;
import com.indeed.imhotep.api.FTGSIterator;
import com.indeed.util.core.Pair;
import com.indeed.util.core.reference.SharedReference;
import org.apache.log4j.Logger;
/**
* @author jplaisance
*/
public abstract class AbstractFlamdexFTGSIterator implements FTGSIterator {
private static final Logger log = Logger.getLogger(AbstractFlamdexFTGSIterator.class);
/**
*
*/
protected final ImhotepLocalSession session;
protected final int[] groupsSeen;
protected final BitTree bitTree;
protected final long[][] termGrpStats;
protected boolean currentFieldIsIntType;
protected SharedReference<FlamdexReader> flamdexReader;
long intTermsTime = 0;
long stringTermsTime = 0;
long docsTime = 0;
long lookupsTime = 0;
long timingErrorTime = 0;
protected String currentField;
private int groupPointer;
private int groupsSeenCount;
protected boolean resetGroupStats = false;
protected FastBitSet fieldZeroDocBitset;
protected int termIndex;
public AbstractFlamdexFTGSIterator(ImhotepLocalSession imhotepLocalSession, SharedReference<FlamdexReader> flamdexReader) {
this.session = imhotepLocalSession;
this.termGrpStats = new long[session.numStats][session.docIdToGroup.getNumGroups()];
this.groupsSeen = new int[session.docIdToGroup.getNumGroups()];
this.bitTree = new BitTree(session.docIdToGroup.getNumGroups());
this.flamdexReader = flamdexReader;
}
@Override
public abstract boolean nextField();
@Override
public abstract void close();
@Override
public final String fieldName() {
return currentField;
}
@Override
public final boolean fieldIsIntType() {
return currentFieldIsIntType;
}
@Override
public abstract boolean nextTerm();
@Override
public abstract long termDocFreq();
@Override
public abstract long termIntVal();
@Override
public abstract String termStringVal();
@Override
public final boolean nextGroup() {
if (!resetGroupStats) {
if (groupPointer >= groupsSeenCount) return false;
groupPointer++;
return groupPointer < groupsSeenCount;
}
return calculateTermGroupStats();
}
private boolean calculateTermGroupStats() {
// clear out ram from previous iterations if necessary
for (final long[] x : termGrpStats) ImhotepLocalSession.clear(x, groupsSeen, groupsSeenCount);
groupsSeenCount = 0;
if (fieldZeroDocBitset != null) {
if (termIndex == fieldZeroDocBitset.size()) expandFieldNonZeroDocBitset();
final boolean skip = fieldZeroDocBitset.get(termIndex);
termIndex++;
if (skip) return false;
}
// this is the critical loop of all of imhotep, making this loop faster is very good....
synchronized (session) {
while (true) {
if (ImhotepLocalSession.logTiming) docsTime -= System.nanoTime();
final int n = fillDocIdBuffer();
if (ImhotepLocalSession.logTiming) {
docsTime += System.nanoTime();
lookupsTime -= System.nanoTime();
}
session.docIdToGroup.nextGroupCallback(n, termGrpStats, bitTree);
if (ImhotepLocalSession.logTiming) {
lookupsTime += System.nanoTime();
timingErrorTime -= System.nanoTime();
timingErrorTime += System.nanoTime();
}
if (n < ImhotepLocalSession.BUFFER_SIZE) break;
}
}
groupsSeenCount = bitTree.dump(groupsSeen);
if (fieldZeroDocBitset != null && groupsSeenCount == 0) {
fieldZeroDocBitset.set(termIndex - 1);
}
groupPointer = 0;
resetGroupStats = false;
return groupsSeenCount > 0;
}
protected abstract int fillDocIdBuffer();
private void expandFieldNonZeroDocBitset() {
synchronized (session) {
if (fieldZeroDocBitset == null) return;
if(session.memory.claimMemory(FastBitSet.calculateMemoryUsage(fieldZeroDocBitset.size() * 2))) {
final FastBitSet tmpBitset = new FastBitSet(fieldZeroDocBitset.size() * 2);
tmpBitset.or(fieldZeroDocBitset);
final long oldSize = fieldZeroDocBitset.memoryUsage();
fieldZeroDocBitset = tmpBitset;
session.fieldZeroDocBitsets.put(Pair.of(currentField, currentFieldIsIntType), fieldZeroDocBitset);
session.memory.releaseMemory(oldSize);
} else {
ImhotepLocalSession.log.warn("Insufficient expansion memory, disabling ftgs zero group bitset optimization");
session.clearZeroDocBitsets();
fieldZeroDocBitset = null;
session.fieldZeroDocBitsets = null;
}
}
}
@Override
public final int group() {
return groupsSeen[groupPointer];
}
@Override
public final void groupStats(long[] stats) {
final int group = group();
for (int i = 0; i < session.numStats; i++) {
stats[i] = termGrpStats[i][group];
}
}
}