package org.apache.solr.search.federated; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.ShardParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.handler.component.MergeStrategy; import org.apache.solr.handler.component.ResponseBuilder; import org.apache.solr.handler.component.ShardDoc; import org.apache.solr.handler.component.ShardRequest; import org.apache.solr.handler.component.ShardResponse; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; import org.apache.solr.search.CursorMark; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SortSpec; /** * During merge, when encountering docs with the same id as seen before, do not * ignore, rather, group together in results. */ public class DJoinMergeStrategy implements MergeStrategy { @Override public boolean mergesIds() { return true; } @Override @SuppressWarnings({ "rawtypes", "unchecked" }) public void merge(ResponseBuilder rb, ShardRequest sreq) { SortSpec ss = rb.getSortSpec(); Sort sort = ss.getSort(); SortField[] sortFields = null; if (sort != null) sortFields = sort.getSort(); else { sortFields = new SortField[] { SortField.FIELD_SCORE }; } IndexSchema schema = rb.req.getSchema(); SchemaField uniqueKeyField = schema.getUniqueKeyField(); // Merge the docs via a priority queue so we don't have to sort *all* of the // documents... we only need to order the top (rows+start) Map<String, NamedList> sortFieldValuesMap = new HashMap<>(); Map<String, NamedList> unmarshalledSortFieldValuesMap = new HashMap<>(); ShardFieldSortedHitQueue queue = new ShardFieldSortedHitQueue(unmarshalledSortFieldValuesMap, sortFields, ss.getOffset() + ss.getCount(), rb.req.getSearcher()); NamedList<Object> shardInfo = null; if (rb.req.getParams().getBool(ShardParams.SHARDS_INFO, false)) { shardInfo = new SimpleOrderedMap<>(); rb.rsp.getValues().add(ShardParams.SHARDS_INFO, shardInfo); } long numFound = 0; Float maxScore = null; boolean partialResults = false; for (ShardResponse srsp : sreq.responses) { String shard = srsp.getShard(); // this hack is needed for test code since ShardResponse is so unfriendly if (shard == null) { shard = (String)srsp.getSolrResponse().getResponse().get("shard"); } SolrDocumentList docs = null; if (shardInfo != null) { SimpleOrderedMap<Object> nl = new SimpleOrderedMap<>(); if (srsp.getException() != null) { Throwable t = srsp.getException(); if (t instanceof SolrServerException) { t = ((SolrServerException) t).getCause(); } nl.add("error", t.toString()); StringWriter trace = new StringWriter(); t.printStackTrace(new PrintWriter(trace)); nl.add("trace", trace.toString()); if (srsp.getShardAddress() != null) { nl.add("shardAddress", srsp.getShardAddress()); } } else { docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response"); nl.add("numFound", docs.getNumFound()); nl.add("maxScore", docs.getMaxScore()); nl.add("shardAddress", srsp.getShardAddress()); } if (srsp.getSolrResponse() != null) { nl.add("time", srsp.getSolrResponse().getElapsedTime()); } shardInfo.add(shard, nl); } // now that we've added the shard info, let's only proceed if we have no error. if (srsp.getException() != null) { partialResults = true; continue; } if (docs == null) { // could have been initialized in the shards info block above docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response"); } NamedList<?> responseHeader = (NamedList<?>) srsp.getSolrResponse().getResponse().get("responseHeader"); if (responseHeader != null && Boolean.TRUE.equals(responseHeader.get("partialResults"))) { partialResults = true; } // calculate global maxScore and numDocsFound if (docs.getMaxScore() != null) { maxScore = maxScore == null ? docs.getMaxScore() : Math.max(maxScore, docs.getMaxScore()); } numFound += docs.getNumFound(); NamedList sortFieldValues = (NamedList) (srsp.getSolrResponse().getResponse().get("sort_values")); sortFieldValuesMap.put(shard, sortFieldValues); NamedList unmarshalledSortFieldValues = unmarshalSortValues(ss, sortFieldValues, schema); unmarshalledSortFieldValuesMap.put(shard, unmarshalledSortFieldValues); // go through every doc in this response, construct a ShardDoc, and // put it in the priority queue so it can be ordered. for (int i = 0; i < docs.size(); i++) { SolrDocument doc = docs.get(i); Object id = doc.getFieldValue(uniqueKeyField.getName()); Object scoreObj = doc.getFieldValue("score"); Float score = null; if (scoreObj != null) { if (scoreObj instanceof String) { score = Float.parseFloat((String)scoreObj); } else { score = (Float)scoreObj; } } ShardDoc shardDoc = new ShardDoc(); shardDoc.id = id; shardDoc.shard = shard; shardDoc.orderInShard = i; if (score != null) { shardDoc.score = score; } queue.insertWithReplacement(shardDoc); } // end for-each-doc-in-response } // end for-each-response // The queue now has 0 -> queuesize docs, where queuesize <= start + rows // So we want to pop the last documents off the queue to get // the docs offset -> queuesize int resultSize = queue.size() - ss.getOffset(); resultSize = Math.max(0, resultSize); // there may not be any docs in range // build resultIds, which is used to request fields from each shard, and initialise responseDocs DuplicateDocumentList responseDocs = new DuplicateDocumentList(resultSize, maxScore, numFound, ss.getOffset()); Map<Object, ShardDoc> resultIds = new AllShardsResultIds(sreq.actualShards); for (int i = resultSize - 1; i >= 0; i--) { ShardDoc shardDoc = queue.pop(); shardDoc.positionInResponse = i; // Need the toString() for correlation with other lists that must // be strings (like keys in highlighting, explain, etc) resultIds.put(shardDoc.id.toString(), shardDoc); // pre-populate responseDocs NamedList docSortValues = sortFieldValuesMap.get(shardDoc.shard); NamedList sortValue = new NamedList(); for (int j = 0; j < docSortValues.size(); ++j) { String fieldName = docSortValues.getName(j); List values = (List)docSortValues.getVal(j); sortValue.add(fieldName, values.get(shardDoc.orderInShard)); } responseDocs.setParentDoc(shardDoc.positionInResponse, docSortValues.size() > 0 ? sortValue : null, shardDoc.score); } // Add hits for distributed requests // https://issues.apache.org/jira/browse/SOLR-3518 rb.rsp.addToLog("hits", numFound); // save these results in a private area so we can access them // again when retrieving stored fields. // TODO: use ResponseBuilder (w/ comments) or the request context? rb.resultIds = resultIds; rb.setResponseDocs(responseDocs); populateNextCursorMarkFromMergedShards(rb, unmarshalledSortFieldValuesMap); if (partialResults) { if (rb.rsp.getResponseHeader().get("partialResults") == null) { rb.rsp.getResponseHeader().add("partialResults", Boolean.TRUE); } } } @SuppressWarnings({ "rawtypes", "unchecked" }) private void populateNextCursorMarkFromMergedShards(ResponseBuilder rb, Map<String, NamedList> sortFieldValuesMap) { final CursorMark lastCursorMark = rb.getCursorMark(); if (null == lastCursorMark) { // Not a cursor based request return; // NOOP } assert null != rb.resultIds : "resultIds was not set in ResponseBuilder"; Collection<ShardDoc> docsOnThisPage = rb.resultIds.values(); if (0 == docsOnThisPage.size()) { // nothing more matching query, re-use existing totem so user can "resume" // search later if it makes sense for this sort. rb.setNextCursorMark(lastCursorMark); return; } ShardDoc lastDoc = null; // ShardDoc and rb.resultIds are weird structures to work with... for (ShardDoc eachDoc : docsOnThisPage) { if (null == lastDoc || lastDoc.positionInResponse < eachDoc.positionInResponse) { lastDoc = eachDoc; } } SortField[] sortFields = lastCursorMark.getSortSpec().getSort().getSort(); List<Object> nextCursorMarkValues = new ArrayList<>(sortFields.length); for (SortField sf : sortFields) { if (sf.getType().equals(SortField.Type.SCORE)) { nextCursorMarkValues.add(lastDoc.score); } else { assert null != sf.getField() : "SortField has null field"; NamedList sortFieldValues = sortFieldValuesMap.get(lastDoc.shard); List<Object> fieldVals = (List<Object>)sortFieldValues.get(sf.getField()); nextCursorMarkValues.add(fieldVals.get(lastDoc.orderInShard)); } } CursorMark nextCursorMark = lastCursorMark.createNext(nextCursorMarkValues); assert null != nextCursorMark : "null nextCursorMark"; rb.setNextCursorMark(nextCursorMark); } @SuppressWarnings({ "rawtypes", "unchecked" }) private NamedList unmarshalSortValues(SortSpec sortSpec, NamedList sortFieldValues, IndexSchema schema) { NamedList unmarshalledSortValsPerField = new NamedList(); if (0 == sortFieldValues.size()) return unmarshalledSortValsPerField; List<SchemaField> schemaFields = sortSpec.getSchemaFields(); SortField[] sortFields = sortSpec.getSort().getSort(); int marshalledFieldNum = 0; for (int sortFieldNum = 0; sortFieldNum < sortFields.length; sortFieldNum++) { final SortField sortField = sortFields[sortFieldNum]; final SortField.Type type = sortField.getType(); // :TODO: would be simpler to always serialize every position of SortField[] if (type == SortField.Type.SCORE || type == SortField.Type.DOC) continue; final String sortFieldName = sortField.getField(); final String valueFieldName = sortFieldValues.getName(marshalledFieldNum); assert sortFieldName.equals(valueFieldName) : "sortFieldValues name key does not match expected SortField.getField"; List sortVals = (List) sortFieldValues.getVal(marshalledFieldNum); final SchemaField schemaField = schemaFields.get(sortFieldNum); if (null == schemaField) { unmarshalledSortValsPerField.add(sortField.getField(), sortVals); } else { FieldType fieldType = schemaField.getType(); List unmarshalledSortVals = new ArrayList(); for (Object sortVal : sortVals) { unmarshalledSortVals.add(fieldType.unmarshalSortValue(sortVal)); } unmarshalledSortValsPerField.add(sortField.getField(), unmarshalledSortVals); } marshalledFieldNum++; } return unmarshalledSortValsPerField; } @Override public boolean handlesMergeFields() { return false; } @Override public void handleMergeFields(ResponseBuilder rb, SolrIndexSearcher searcher) throws IOException { // do nothing (since handlesMergeFields is false) } @Override public int getCost() { return 0; } }