/**
* This software is licensed to you under the Apache License, Version 2.0 (the
* "Apache License").
*
* LinkedIn's contributions are made under the Apache License. If you contribute
* to the Software, the contributions will be deemed to have been made under the
* Apache License, unless you expressly indicate otherwise. Please do not make any
* contributions that would be inconsistent with the Apache License.
*
* You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, this software
* distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
* License for the specific language governing permissions and limitations for the
* software governed under the Apache License.
*
* © 2012 LinkedIn Corp. All Rights Reserved.
*/
package com.senseidb.svc.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import com.browseengine.bobo.api.FacetAccessible;
import com.browseengine.bobo.facets.FacetHandler;
import com.sensei.search.req.protobuf.SenseiReqProtoSerializer;
import com.senseidb.search.relevance.SimpleTFSimilarity;
import com.senseidb.search.req.*;
import org.apache.log4j.Logger;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Weight;
import proj.zoie.api.ZoieIndexReader;
import proj.zoie.api.ZoieIndexReader.SubReaderAccessor;
import proj.zoie.api.ZoieIndexReader.SubReaderInfo;
import com.browseengine.bobo.api.BoboBrowser;
import com.browseengine.bobo.api.BoboIndexReader;
import com.browseengine.bobo.api.BrowseException;
import com.browseengine.bobo.api.BrowseHit;
import com.browseengine.bobo.api.BrowseRequest;
import com.browseengine.bobo.api.BrowseResult;
import com.browseengine.bobo.api.MultiBoboBrowser;
import com.browseengine.bobo.sort.SortCollector;
import com.linkedin.norbert.network.JavaSerializer;
import com.linkedin.norbert.network.Serializer;
import com.senseidb.indexing.SenseiIndexPruner;
import com.senseidb.indexing.SenseiIndexPruner.IndexReaderSelector;
import com.senseidb.search.node.ResultMerger;
import com.senseidb.search.node.SenseiCore;
import com.senseidb.search.node.SenseiQueryBuilderFactory;
import com.senseidb.search.req.SenseiHit;
import com.senseidb.search.req.SenseiRequest;
import com.senseidb.search.req.SenseiResult;
import com.senseidb.search.req.mapred.impl.SenseiMapFunctionWrapper;
import com.senseidb.util.RequestConverter;
import com.yammer.metrics.core.Timer;
import static com.senseidb.servlet.SenseiSearchServletParams.PARAM_RESULT_HIT_UID;
public class CoreSenseiServiceImpl extends AbstractSenseiCoreService<SenseiRequest, SenseiResult> {
public static final Serializer<SenseiRequest, SenseiResult> JAVA_SERIALIZER =
JavaSerializer.apply("SenseiRequest", SenseiRequest.class, SenseiResult.class);
public static final Serializer<SenseiRequest, SenseiResult> PROTO_SERIALIZER =
new SenseiReqProtoSerializer();
public static final Serializer<SenseiRequest, SenseiResult> PROTO_V2_SERIALIZER =
new SenseiSnappyProtoSerializer();
private static final Logger logger = Logger.getLogger(CoreSenseiServiceImpl.class);
private static final String TOP_DOCS_METHOD = "topDocs";
private static final String GET_HITS_FEATURES_METHOD = "getHitsFeatures";
private static final String GET_TOTAL_HITS_METHOD = "getTotalHits";
private final Timer _timerMetric;
public CoreSenseiServiceImpl(SenseiCore core) {
super(core);
_timerMetric = registerTimer("prune");
}
@Override
protected String getMetricScope()
{
return "node";
}
private SenseiResult browse(SenseiRequest senseiRequest, MultiBoboBrowser browser, BrowseRequest req, SubReaderAccessor<BoboIndexReader> subReaderAccessor) throws BrowseException, IOException, Exception {
final SenseiResult result = new SenseiResult();
long start = System.currentTimeMillis();
int offset = req.getOffset();
int count = req.getCount();
if (offset < 0 || count < 0)
{
throw new IllegalArgumentException("both offset and count must be > 0: " + offset + "/" + count);
}
final Collector collector = senseiRequest.buildCollector(req.getQuery());
BrowseResult res = null;
BrowseHit[] hits = null;
ScoreDoc[] scoreDocs = null;
float [][] features = null;
int totalHits;
if (collector == null) {
if (senseiRequest.isSimpleRelevance())
{
browser.setSimilarity(new SimpleTFSimilarity());
}
res = browser.browse(req);
hits = res.getHits();
}
else {
Map<String, FacetAccessible> facetCollectors = new HashMap<String, FacetAccessible>();
Weight w = req.getQuery().createWeight(browser);
browser.browse(req, w, collector, facetCollectors, offset);
try {
/**
* A custom collector must implement topDocs() method and a getHitsFeatures() method.
*/
scoreDocs = (ScoreDoc[]) collector.getClass().getMethod(TOP_DOCS_METHOD).invoke(collector);
features = (float[][]) collector.getClass().getMethod(GET_HITS_FEATURES_METHOD).invoke(collector);
hits = new BrowseHit[scoreDocs.length];
Map<String, FacetHandler<?>> facetHandlerMap = browser.getFacetHandlerMap();
int i = 0;
for (ScoreDoc doc : scoreDocs) {
BrowseHit hit = new BrowseHit();
hit.setScore(doc.score);
hit.setDocid(doc.doc);
Map<String,String[]> map = new HashMap<String,String[]>();
Map<String,Object[]> rawMap = new HashMap<String,Object[]>();
for (Map.Entry<String, FacetHandler<?>> entry : facetHandlerMap.entrySet()) {
map.put(entry.getKey(), browser.getFieldVal(doc.doc, entry.getKey()));
rawMap.put(entry.getKey(), browser.getRawFieldVal(doc.doc, entry.getKey()));
}
hit.setFieldValues(map);
hit.setRawFieldValues(rawMap);
hits[i++] = hit;
}
res = new BrowseResult();
res.setHits(hits);
res.addAll(facetCollectors);
res.setTid(req.getTid());
totalHits = (Integer) collector.getClass().getMethod(GET_TOTAL_HITS_METHOD).invoke(collector);
res.setNumHits(totalHits);
res.setNumGroups(totalHits);
} catch (Exception e) {
logger.error(e.getMessage(), e);
}
}
if (req.getMapReduceWrapper() != null) {
result.setMapReduceResult(req.getMapReduceWrapper().getResult());
}
SenseiHit[] senseiHits = new SenseiHit[hits.length];
Set<String> selectSet = senseiRequest.getSelectSet();
for (int i = 0; i < hits.length; i++)
{
BrowseHit hit = hits[i];
SenseiHit senseiHit = new SenseiHit();
int docid = hit.getDocid();
SubReaderInfo<BoboIndexReader> readerInfo = subReaderAccessor.getSubReaderInfo(docid);
Long uid = (Long)hit.getRawField(PARAM_RESULT_HIT_UID);
if (uid == null)
uid = ((ZoieIndexReader<BoboIndexReader>) readerInfo.subreader.getInnerReader()).getUID(readerInfo.subdocid);
senseiHit.setUID(uid);
senseiHit.setDocid(docid);
senseiHit.setScore(hit.getScore());
senseiHit.setComparable(hit.getComparable());
if (selectSet != null && selectSet.size() != 0)
{
// Clear the data those are not used:
if (hit.getFieldValues() != null)
{
Iterator<String> iter = hit.getFieldValues().keySet().iterator();
while (iter.hasNext())
{
if (!selectSet.contains(iter.next()))
{
iter.remove();
}
}
}
if (hit.getRawFieldValues() != null)
{
Iterator<String> iter = hit.getRawFieldValues().keySet().iterator();
while (iter.hasNext())
{
if (!selectSet.contains(iter.next()))
{
iter.remove();
}
}
}
}
if (features != null) {
senseiHit.setFeatures(features[i]);
}
senseiHit.setFieldValues(hit.getFieldValues());
senseiHit.setRawFieldValues(hit.getRawFieldValues());
senseiHit.setStoredFields(hit.getStoredFields());
senseiHit.setExplanation(hit.getExplanation());
senseiHit.setGroupField(hit.getGroupField());
senseiHit.setGroupValue(hit.getGroupValue());
senseiHit.setRawGroupValue(hit.getRawGroupValue());
senseiHit.setGroupHitsCount(hit.getGroupHitsCount());
senseiHit.setTermFreqMap(hit.getTermFreqMap());
senseiHits[i] = senseiHit;
}
result.setHits(senseiHits);
result.setNumHits(res.getNumHits());
result.setNumGroups(res.getNumGroups());
result.setGroupAccessibles(res.getGroupAccessibles());
result.setSortCollector(res.getSortCollector());
result.setTotalDocs(browser.numDocs());
result.addAll(res.getFacetMap());
// Defer the closing of facetAccessibles till result merging time.
// Collection<FacetAccessible> facetAccessibles = facetMap.values();
// for (FacetAccessible facetAccessible : facetAccessibles){
// facetAccessible.close();
// }
long end = System.currentTimeMillis();
result.setTime(end - start);
// set the transaction ID to trace transactions
result.setTid(req.getTid());
Query parsedQ = req.getQuery();
if (parsedQ != null)
{
result.setParsedQuery(parsedQ.toString());
} else
{
result.setParsedQuery("*:*");
}
return result;
}
@Override
public SenseiResult handlePartitionedRequest(final SenseiRequest request,
List<BoboIndexReader> readerList,SenseiQueryBuilderFactory queryBuilderFactory) throws Exception {
MultiBoboBrowser browser = null;
try
{
final List<BoboIndexReader> segmentReaders = BoboBrowser.gatherSubReaders(readerList);
if (segmentReaders!=null && segmentReaders.size() > 0) {
final AtomicInteger skipDocs = new AtomicInteger(0);
final SenseiIndexPruner pruner = _core.getIndexPruner();
List<BoboIndexReader> validatedSegmentReaders = _timerMetric.time(new Callable<List<BoboIndexReader>>(){
@Override
public List<BoboIndexReader> call() throws Exception {
IndexReaderSelector readerSelector = pruner.getReaderSelector(request);
List<BoboIndexReader> validatedReaders = new ArrayList<BoboIndexReader>(segmentReaders.size());
for (BoboIndexReader segmentReader : segmentReaders){
if (readerSelector.isSelected(segmentReader)){
validatedReaders.add(segmentReader);
}
else{
skipDocs.addAndGet(segmentReader.numDocs());
}
}
return validatedReaders;
}
});
pruner.sort(validatedSegmentReaders);
browser = new MultiBoboBrowser(BoboBrowser.createBrowsables(validatedSegmentReaders));
request.setSearchable(browser);
request.setQueryBuilderFactory(queryBuilderFactory);
BrowseRequest breq = RequestConverter.convert(request, queryBuilderFactory);
if (request.getMapReduceFunction() != null) {
SenseiMapFunctionWrapper mapWrapper = new SenseiMapFunctionWrapper(request.getMapReduceFunction(), _core.getSystemInfo().getFacetInfos());
breq.setMapReduceWrapper(mapWrapper);
}
SubReaderAccessor<BoboIndexReader> subReaderAccessor =
ZoieIndexReader.getSubReaderAccessor(validatedSegmentReaders);
SenseiResult res = browse(request, browser, breq, subReaderAccessor);
int totalDocs = res.getTotalDocs()+skipDocs.get();
res.setTotalDocs(totalDocs);
// For debugging serialization issues:
// byte[] responseBytes = getSerializer().responseToBytes(res);
// SenseiResult response2 = getSerializer().responseFromBytes(responseBytes);
// if(!res.equals(response2)) {
// throw new IllegalArgumentException("Cant serialize response");
// }
return res;
}
else{
return new SenseiResult();
}
} catch (Exception e)
{
logger.error(e.getMessage(), e);
throw e;
} finally
{
if (browser != null)
{
try
{
browser.close();
} catch (IOException ioe)
{
logger.error(ioe.getMessage(), ioe);
}
}
}
}
@Override
public SenseiResult mergePartitionedResults(SenseiRequest r,
List<SenseiResult> resultList) {
try
{
return ResultMerger.merge(r, resultList, true);
}
finally
{
if (resultList != null)
{
for (SenseiResult res : resultList)
{
SortCollector sortCollector = res.getSortCollector();
if (sortCollector != null)
{
sortCollector.close();
}
}
}
}
}
@Override
public SenseiResult getEmptyResultInstance(Throwable error) {
return new SenseiResult();
}
@Override
public Serializer<SenseiRequest, SenseiResult> getSerializer() {
return PROTO_V2_SERIALIZER;
}
}