package com.browseengine.bobo.facets.filter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import com.browseengine.bobo.api.BoboSegmentReader;
import com.browseengine.bobo.docidset.EmptyDocIdSet;
import com.browseengine.bobo.docidset.OrDocIdSet;
import com.browseengine.bobo.docidset.RandomAccessDocIdSet;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.TermValueList;
public class AdaptiveFacetFilter extends RandomAccessFilter {
private final RandomAccessFilter _facetFilter;
private final FacetDataCacheBuilder _facetDataCacheBuilder;
private final Set<String> _valSet;
private boolean _takeComplement = false;
public interface FacetDataCacheBuilder {
FacetDataCache<?> build(BoboSegmentReader reader);
String getName();
String getIndexFieldName();
}
// If takeComplement is true, we still return the filter for NotValues . Therefore, the calling
// function of this class needs to apply NotFilter on top
// of this filter if takeComplement is true.
public AdaptiveFacetFilter(FacetDataCacheBuilder facetDataCacheBuilder,
RandomAccessFilter facetFilter, String[] val, boolean takeComplement) {
_facetFilter = facetFilter;
_facetDataCacheBuilder = facetDataCacheBuilder;
_valSet = new HashSet<String>(Arrays.asList(val));
_takeComplement = takeComplement;
}
@Override
public double getFacetSelectivity(BoboSegmentReader reader) {
double selectivity = _facetFilter.getFacetSelectivity(reader);
if (_takeComplement) return 1.0 - selectivity;
return selectivity;
}
@Override
public RandomAccessDocIdSet getRandomAccessDocIdSet(BoboSegmentReader reader) throws IOException {
RandomAccessDocIdSet innerDocSet = _facetFilter.getRandomAccessDocIdSet(reader);
if (innerDocSet == EmptyDocIdSet.getInstance()) {
return innerDocSet;
}
FacetDataCache<?> dataCache = _facetDataCacheBuilder.build(reader);
int totalCount = reader.maxDoc();
TermValueList<?> valArray = dataCache.valArray;
int freqCount = 0;
ArrayList<String> validVals = new ArrayList<String>(_valSet.size());
for (String val : _valSet) {
int idx = valArray.indexOf(val);
if (idx >= 0) {
validVals.add(valArray.get(idx)); // get and format the value
freqCount += dataCache.freqs[idx];
}
}
if (validVals.size() == 0) {
return EmptyDocIdSet.getInstance();
}
// takeComplement is only used to choose between TermListRandomAccessDocIdSet and innerDocSet
int validFreqCount = _takeComplement ? (totalCount - freqCount) : freqCount;
if (_facetDataCacheBuilder.getIndexFieldName() != null && ((validFreqCount << 1) < totalCount)) {
return new TermListRandomAccessDocIdSet(_facetDataCacheBuilder.getIndexFieldName(),
innerDocSet, validVals, reader);
} else {
return innerDocSet;
}
}
public static class TermListRandomAccessDocIdSet extends RandomAccessDocIdSet {
private final RandomAccessDocIdSet _innerSet;
private final ArrayList<String> _vals;
private final AtomicReader _reader;
private final String _name;
private final static int OR_THRESHOLD = 5;
TermListRandomAccessDocIdSet(String name, RandomAccessDocIdSet innerSet,
ArrayList<String> vals, AtomicReader reader) {
_name = name;
_innerSet = innerSet;
_vals = vals;
_reader = reader;
}
public static class TermDocIdSet extends DocIdSet {
final Term term;
private final AtomicReader reader;
public TermDocIdSet(AtomicReader reader, String name, String val) {
this.reader = reader;
term = new Term(name, val);
}
@Override
public DocIdSetIterator iterator() throws IOException {
final DocsEnum docsEnum = reader.termDocsEnum(term);
if (docsEnum == null) {
return EmptyDocIdSet.getInstance().iterator();
}
return docsEnum;
};
}
@Override
public boolean get(int docId) {
return _innerSet.get(docId);
}
@Override
public DocIdSetIterator iterator() throws IOException {
if (_vals.size() == 0) {
return EmptyDocIdSet.getInstance().iterator();
}
if (_vals.size() == 1) {
return new TermDocIdSet(_reader, _name, _vals.get(0)).iterator();
} else {
if (_vals.size() < OR_THRESHOLD) {
ArrayList<DocIdSet> docSetList = new ArrayList<DocIdSet>(_vals.size());
for (String val : _vals) {
docSetList.add(new TermDocIdSet(_reader, _name, val));
}
return new OrDocIdSet(docSetList).iterator();
} else {
return _innerSet.iterator();
}
}
}
}
}