package lux.solr;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import lux.Evaluator;
import lux.SearchIteratorBase;
import lux.exception.LuxException;
import lux.functions.SearchBase.QueryParser;
import lux.index.FieldRole;
import lux.index.IndexConfiguration;
import net.sf.saxon.om.DocumentInfo;
import net.sf.saxon.om.NodeInfo;
import net.sf.saxon.om.SequenceIterator;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.trans.XPathException;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.search.DocList;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;
import org.apache.solr.util.SolrPluginUtils;
/**
*
* Perform distributed XQuery searches. We mimic lazy evaluation by maintaining an iterator
* that re-issues requests when its local cache is exhausted. Note: deep paging may be quite expensive
* since *all* results starting with the first must be retrieved for each page!
*
* TODO: We could optimize better if we kept track of per-shard positions. To do that we'd have to know which shard each result came from.
* This info is held in ResponseBuilder.resultIds, which is a map from id to ShardDoc; each ShardDoc has
* a shard id. We can count the # of docs from each shard and calculate a position from that.
*/
public class CloudSearchIterator extends SearchIteratorBase {
private int limit; // = solr 'rows'
private SolrQueryResponse response;
private final String query;
private final QueryParser queryParser;
private final String xmlFieldName;
private final String uriFieldName;
private final String idFieldName;
private final HashSet<String> fieldNames;
private String[] effectiveCriteria;
/**
* Initialize the iterator
* @param eval the Evaluator holds context for the query
* @param query the Lucene query to execute and iterate
* @param queryParser either blank (for the default qp), or 'xml' for the xml query parser TODO: enum
* @param sortCriteria the sort order for the results
* @param start1 the 1-based start position at which to begin the iteration
*/
public CloudSearchIterator (Evaluator eval, String query, QueryParser queryParser, String[] sortCriteria, int start1) {
super (eval, sortCriteria, start1);
this.limit = 20;
this.queryParser = queryParser;
this.query = query;
IndexConfiguration indexConfig = eval.getCompiler().getIndexConfiguration();
xmlFieldName = indexConfig.getFieldName(FieldRole.XML_STORE);
uriFieldName = indexConfig.getFieldName(FieldRole.URI);
idFieldName = indexConfig.getFieldName(FieldRole.ID);
fieldNames = new HashSet<String>();
fieldNames.add(xmlFieldName);
fieldNames.add(uriFieldName);
fieldNames.add(idFieldName);
}
@Override
public SequenceIterator<NodeInfo> getAnother() throws XPathException {
return new CloudSearchIterator(eval, query, queryParser, sortCriteria, start + 1);
}
public long count() {
if (response == null) {
this.limit = 0;
doCloudSearch();
}
return getResultNumFound(response);
}
@Override
public NodeInfo next() throws XPathException {
for (;;) {
if (response != null) {
if (position >= getResultNumFound(response)) {
return null;
}
Object results = response.getValues().get("response");
if (results == null) {
return null;
}
SolrDocumentList docs;
if (results instanceof DocList) {
try {
docs = SolrPluginUtils.docListToSolrDocumentList ((DocList) results,
(SolrIndexSearcher) eval.getSearcher().getWrappedSearcher(), fieldNames, null);
} catch (IOException e) {
throw new XPathException (e);
}
}
else if (results instanceof SolrDocumentList) {
docs = (SolrDocumentList) results;
}
else {
throw new XPathException ("Solr query response unexpectedly of type " + results.getClass().getName());
}
if (position < docs.getStart() + docs.size()) {
return getNextDocument (docs);
}
// otherwise fall through and get the next page of results
}
doCloudSearch();
}
}
private NodeInfo getNextDocument (SolrDocumentList docs) {
// FIXME: test pagination I think there is a bug here if w/start > 0?
SolrDocument doc = docs.get(position++ - (int) docs.getStart());
String uri = (String) doc.getFirstValue(uriFieldName);
Object oxml = doc.getFirstValue(xmlFieldName);
Long id = (Long) doc.getFirstValue(idFieldName);
if (id == null) {
// try to support migrating an old index?
throw new LuxException("This index has no lux docids: it cannot support Lux on Solr Cloud");
}
String xml = (String) ((oxml instanceof String) ? oxml : null);
byte [] bytes = (byte[]) ((oxml instanceof byte[]) ? oxml : null);
XdmNode node = eval.getDocReader().createXdmNode(id, uri, xml, bytes);
DocumentInfo docNode = node.getUnderlyingNode().getDocumentRoot();
docNode.setUserData(SolrDocument.class.getName(), doc);
return docNode;
}
/* Make a new query request, using this.query, start calculated based on the passed-in responseBuilder
* sorting based on sortCriteria, and fields=lux_xml. Also: if rb asks for debug, pass that along
*/
private void doCloudSearch () {
ResponseBuilder origRB = ((SolrQueryContext)eval.getQueryContext()).getResponseBuilder();
ModifiableSolrParams params = new ModifiableSolrParams();
params.add((CommonParams.Q), query);
if (QueryParser.XML == queryParser) {
params.add("defType", "xml");
}
params.add(CommonParams.START, Integer.toString(position));
params.add(CommonParams.ROWS, Integer.toString(limit));
//params.add(CommonParams.FL, uriFieldName, xmlFieldName, idFieldName);
params.add(CommonParams.FL, "*");
SolrParams origParams = origRB.req.getParams();
String debug = origParams.get(CommonParams.DEBUG);
if (debug != null) {
params.add(CommonParams.DEBUG, debug);
}
params.add("distrib", "true");
params.add("shards", origParams.get("shards"));
SortSpec sortSpec = makeSortSpec();
addSortParam (params, sortSpec);
XQueryComponent xqueryComponent = ((SolrQueryContext)eval.getQueryContext()).getQueryComponent();
SolrQueryRequest req = new CloudQueryRequest(xqueryComponent.getCore(), params, sortSpec);
response = new SolrQueryResponse();
xqueryComponent.getSearchHandler().handleRequest(req, response);
eval.getQueryStats().docCount += getResultNumFound(response);
}
private long getResultNumFound (SolrQueryResponse rsp) {
Object docs = response.getValues().get("response");
if (docs != null) {
if (docs instanceof DocList) {
return ((DocList)docs).matches();
} else if (docs instanceof SolrDocumentList) {
return ((SolrDocumentList)docs).getNumFound();
}
}
return 0;
}
private void addSortParam(ModifiableSolrParams params, SortSpec sortSpec) {
for (SortField sortField : sortSpec.getSort().getSort()) {
String dir = sortField.getReverse() ? "desc" : "asc";
String field = sortField.getField();
if (field != null) {
params.add("sort", sortField.getField() + ' ' + dir);
}
// FIXME Solr controls sorting missing first/last with a *schema* setting,
// but we insist on runtime control. We should raise an error here
// if the schema is not in line with the runtime setting, since otherwise
// an incorrect ordering will be the result. And provide some kind of
// "recover from the error" setting? Where?
}
}
private String [] getEffectiveSortCriteria () {
if (effectiveCriteria == null) {
assert sortCriteria != null;
ArrayList<String> tmp = new ArrayList<String>();
for (String s : sortCriteria) {
if (! s.equals(FieldRole.LUX_DOCID)) {
tmp.add(s);
}
}
tmp.add(idFieldName);
effectiveCriteria = tmp.toArray(new String[tmp.size()]);
}
return effectiveCriteria;
}
private SortSpec makeSortSpec () {
Sort sort;
// add the uri field as a fallback sorting criterion to enforce a consistent
// document order
if (sortCriteria != null) {
sort = makeSortFromCriteria(getEffectiveSortCriteria());
} else {
//sort = new Sort (SortField.FIELD_SCORE, new SortField(uriFieldName, Type.STRING));
sort = new Sort (new SortField(idFieldName, Type.LONG));
}
return new SortSpec (sort, position, limit);
}
/**
* @param limit the maximum number of results to retrieve per batch
*/
public void setLimit (int limit) {
this.limit = limit;
}
/**
* @return the maximum number of results to retrieve per batch
*/
public int getLimit () {
return limit;
}
}