package com.browseengine.bobo.search; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.LinkedList; import java.util.List; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import com.browseengine.bobo.api.BoboSegmentReader; import com.browseengine.bobo.docidset.RandomAccessDocIdSet; import com.browseengine.bobo.facets.FacetCountCollector; import com.browseengine.bobo.facets.FacetCountCollectorSource; import com.browseengine.bobo.mapred.BoboMapFunctionWrapper; public class BoboSearcher { private List<FacetHitCollector> _facetCollectors; private final BoboSegmentReader _boboSegmentReader; private final AtomicReaderContext _atomicReaderContext; public BoboSearcher(AtomicReaderContext ctx) { _atomicReaderContext = ctx; _facetCollectors = new LinkedList<FacetHitCollector>(); _boboSegmentReader = (BoboSegmentReader) ctx.reader(); } public void setFacetHitCollectorList(List<FacetHitCollector> facetHitCollectors) { if (facetHitCollectors != null) { _facetCollectors = facetHitCollectors; } } abstract static class FacetValidator { protected final FacetHitCollector[] _collectors; protected final int _numPostFilters; protected FacetCountCollector[] _countCollectors; public int _nextTarget; private void sortPostCollectors(final BoboSegmentReader reader) { Comparator<FacetHitCollector> comparator = new Comparator<FacetHitCollector>() { @Override public int compare(FacetHitCollector fhc1, FacetHitCollector fhc2) { double selectivity1 = fhc1._filter.getFacetSelectivity(reader); double selectivity2 = fhc2._filter.getFacetSelectivity(reader); if (selectivity1 < selectivity2) { return -1; } else if (selectivity1 > selectivity2) { return 1; } return 0; } }; Arrays.sort(_collectors, 0, _numPostFilters, comparator); } public FacetValidator(FacetHitCollector[] collectors, int numPostFilters) throws IOException { _collectors = collectors; _numPostFilters = numPostFilters; _countCollectors = new FacetCountCollector[collectors.length]; } /** * This method validates the doc against any multi-select enabled fields. * @param docid * @return true if all fields matched */ public abstract boolean validate(final int docid) throws IOException; public void setNextReader(BoboSegmentReader reader, int docBase) throws IOException { ArrayList<FacetCountCollector> collectorList = new ArrayList<FacetCountCollector>(); sortPostCollectors(reader); for (int i = 0; i < _collectors.length; ++i) { _collectors[i].setNextReader(reader, docBase); FacetCountCollector collector = _collectors[i]._currentPointers.facetCountCollector; if (collector != null) { collectorList.add(collector); } } _countCollectors = collectorList.toArray(new FacetCountCollector[collectorList.size()]); } public FacetCountCollector[] getCountCollectors() { List<FacetCountCollector> collectors = new ArrayList<FacetCountCollector>(); collectors.addAll(Arrays.asList(_countCollectors)); for (FacetHitCollector facetHitCollector : _collectors) { collectors.addAll(facetHitCollector._collectAllCollectorList); collectors.addAll(facetHitCollector._countCollectorList); } return collectors.toArray(new FacetCountCollector[collectors.size()]); } } private final static class DefaultFacetValidator extends FacetValidator { public DefaultFacetValidator(FacetHitCollector[] collectors, int numPostFilters) throws IOException { super(collectors, numPostFilters); } /** * This method validates the doc against any multi-select enabled fields. * @param docid * @return true if all fields matched */ @Override public final boolean validate(final int docid) throws IOException { FacetHitCollector.CurrentPointers miss = null; for (int i = 0; i < _numPostFilters; i++) { FacetHitCollector.CurrentPointers cur = _collectors[i]._currentPointers; int sid = cur.doc; if (sid < docid) { sid = cur.postDocIDSetIterator.advance(docid); cur.doc = sid; if (sid == DocIdSetIterator.NO_MORE_DOCS) { // move this to front so that the call can find the failure faster FacetHitCollector tmp = _collectors[0]; _collectors[0] = _collectors[i]; _collectors[i] = tmp; } } if (sid > docid) // mismatch { if (miss != null) { // failed because we already have a mismatch _nextTarget = (miss.doc < cur.doc ? miss.doc : cur.doc); return false; } miss = cur; } } _nextTarget = docid + 1; if (miss != null) { miss.facetCountCollector.collect(docid); return false; } else { for (FacetCountCollector collector : _countCollectors) { collector.collect(docid); } return true; } } } private final static class OnePostFilterFacetValidator extends FacetValidator { private final FacetHitCollector _firsttime; OnePostFilterFacetValidator(FacetHitCollector[] collectors) throws IOException { super(collectors, 1); _firsttime = _collectors[0]; } @Override public final boolean validate(int docid) throws IOException { FacetHitCollector.CurrentPointers miss = null; RandomAccessDocIdSet set = _firsttime._currentPointers.docidSet; if (set != null && !set.get(docid)) { miss = _firsttime._currentPointers; } _nextTarget = docid + 1; if (miss != null) { miss.facetCountCollector.collect(docid); return false; } else { for (FacetCountCollector collector : _countCollectors) { collector.collect(docid); } return true; } } } private final static class NoNeedFacetValidator extends FacetValidator { NoNeedFacetValidator(FacetHitCollector[] collectors) throws IOException { super(collectors, 0); } @Override public final boolean validate(int docid) throws IOException { for (FacetCountCollector collector : _countCollectors) { collector.collect(docid); } return true; } } protected FacetValidator createFacetValidator() throws IOException { FacetHitCollector[] collectors = new FacetHitCollector[_facetCollectors.size()]; FacetCountCollectorSource[] countCollectors = new FacetCountCollectorSource[collectors.length]; int numPostFilters; int i = 0; int j = collectors.length; for (FacetHitCollector facetCollector : _facetCollectors) { if (facetCollector._filter != null) { collectors[i] = facetCollector; countCollectors[i] = facetCollector._facetCountCollectorSource; i++; } else { j--; collectors[j] = facetCollector; countCollectors[j] = facetCollector._facetCountCollectorSource; } } numPostFilters = i; if (numPostFilters == 0) { return new NoNeedFacetValidator(collectors); } else if (numPostFilters == 1) { return new OnePostFilterFacetValidator(collectors); } else { return new DefaultFacetValidator(collectors, numPostFilters); } } public void search(Weight weight, Filter filter, Collector collector, int start, BoboMapFunctionWrapper mapReduceWrapper) throws IOException { final FacetValidator validator = createFacetValidator(); int target = 0; if (filter == null) { int docStart = start; collector.setNextReader(_atomicReaderContext); validator.setNextReader(_boboSegmentReader, docStart); Scorer scorer = weight.scorer(_atomicReaderContext, true, true, _boboSegmentReader.getLiveDocs()); if (scorer != null) { collector.setScorer(scorer); target = scorer.nextDoc(); while (target != DocIdSetIterator.NO_MORE_DOCS) { if (validator.validate(target)) { collector.collect(target); target = scorer.nextDoc(); } else { target = validator._nextTarget; target = scorer.advance(target); } } } if (mapReduceWrapper != null) { mapReduceWrapper.mapFullIndexReader(_boboSegmentReader, validator.getCountCollectors()); } return; } DocIdSet filterDocIdSet = filter.getDocIdSet(_atomicReaderContext, _boboSegmentReader.getLiveDocs()); // shall we use return or continue here ?? if (filterDocIdSet == null) { return; } int docStart = start; collector.setNextReader(_atomicReaderContext); validator.setNextReader(_boboSegmentReader, docStart); Scorer scorer = weight.scorer(_atomicReaderContext, true, false, _boboSegmentReader.getLiveDocs()); if (scorer != null) { collector.setScorer(scorer); DocIdSetIterator filterDocIdIterator = filterDocIdSet.iterator(); // CHECKME: use // ConjunctionScorer here? if (filterDocIdIterator == null) { return; } int doc = -1; target = filterDocIdIterator.nextDoc(); if (mapReduceWrapper == null) { while (target < DocIdSetIterator.NO_MORE_DOCS) { if (doc < target) { doc = scorer.advance(target); } if (doc == target) // permitted by filter { if (validator.validate(doc)) { collector.collect(doc); target = filterDocIdIterator.nextDoc(); } else { // skip to the next possible docid target = filterDocIdIterator.advance(validator._nextTarget); } } else // doc > target { if (doc == DocIdSetIterator.NO_MORE_DOCS) break; target = filterDocIdIterator.advance(doc); } } } else { // MapReduce wrapper is not null while (target < DocIdSetIterator.NO_MORE_DOCS) { if (doc < target) { doc = scorer.advance(target); } if (doc == target) // permitted by filter { if (validator.validate(doc)) { mapReduceWrapper.mapSingleDocument(doc, _boboSegmentReader); collector.collect(doc); target = filterDocIdIterator.nextDoc(); } else { // skip to the next possible docid target = filterDocIdIterator.advance(validator._nextTarget); } } else // doc > target { if (doc == DocIdSetIterator.NO_MORE_DOCS) break; target = filterDocIdIterator.advance(doc); } } mapReduceWrapper.finalizeSegment(_boboSegmentReader, validator.getCountCollectors()); } } } }