CursorPagingTest.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr;

import org.apache.lucene.util.TestUtil;
import org.apache.lucene.util.SentinelIntSet;
import org.apache.lucene.util.mutable.MutableValueInt;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.CursorMarkParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.GroupParams;

import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_PARAM;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_NEXT;
import static org.apache.solr.common.params.CursorMarkParams.CURSOR_MARK_START;

import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.metrics.MetricsMap;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.search.CursorMark; //jdoc
import org.noggit.ObjectBuilder;

import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Collection;
import java.util.Collections;
import java.util.Map;
import java.util.UUID;

import org.junit.BeforeClass;
import org.junit.After;

/**
 * Tests of deep paging using {@link CursorMark} and {@link CursorMarkParams#CURSOR_MARK_PARAM}.
 */
public class CursorPagingTest extends SolrTestCaseJ4 {

  /** solrconfig.xml file name, shared with other cursor related tests */
  public final static String TEST_SOLRCONFIG_NAME = "solrconfig-deeppaging.xml";
  /** schema.xml file name, shared with other cursor related tests */
  public final static String TEST_SCHEMAXML_NAME = "schema-sorts.xml";
  /** values from enumConfig.xml */
  public static final String[] SEVERITY_ENUM_VALUES =
      { "Not Available", "Low", "Medium", "High", "Critical" };

  @BeforeClass
  public static void beforeTests() throws Exception {
    System.setProperty("solr.test.useFilterForSortedQuery", Boolean.toString(random().nextBoolean()));
    initCore(TEST_SOLRCONFIG_NAME, TEST_SCHEMAXML_NAME);
  }
  @After
  public void cleanup() throws Exception {
    assertU(delQ("*:*"));
    assertU(commit());
  }

  /** verify expected error msgs from bad client behavior */
  public void testBadInputs() throws Exception {
    // sometimes seed some data, othertimes use an empty index
    if (random().nextBoolean()) {
      assertU(adoc("id", "42", "str", "z", "float", "99.99", "int", "42"));
      assertU(adoc("id", "66", "str", "x", "float", "22.00", "int", "-66"));
    } else {
      assertU(commit());
    }
      assertU(commit());

    // empty, blank, or bogus cursor
    for (String c : new String[] { "", "   ", "all the docs please!"}) {
      assertFail(params("q", "*:*", 
                        "sort", "id desc", 
                        CURSOR_MARK_PARAM, c),
                 ErrorCode.BAD_REQUEST, "Unable to parse");
    }

    // no id in sort
    assertFail(params("q", "*:*", 
                      "sort", "score desc", 
                      CURSOR_MARK_PARAM, CURSOR_MARK_START),
               ErrorCode.BAD_REQUEST, "uniqueKey field");
    // _docid_
    assertFail(params("q", "*:*", 
                      "sort", "_docid_ asc, id desc", 
                      CURSOR_MARK_PARAM, CURSOR_MARK_START),
               ErrorCode.BAD_REQUEST, "_docid_");

    // using cursor w/ timeAllowed
    assertFail(params("q", "*:*", 
                      "sort", "id desc", 
                      CommonParams.TIME_ALLOWED, "1000",
                      CURSOR_MARK_PARAM, CURSOR_MARK_START),
               ErrorCode.BAD_REQUEST, CommonParams.TIME_ALLOWED);

    // using cursor w/ grouping
    assertFail(params("q", "*:*", 
                      "sort", "id desc", 
                      GroupParams.GROUP, "true",
                      GroupParams.GROUP_FIELD, "str",
                      CURSOR_MARK_PARAM, CURSOR_MARK_START),
               ErrorCode.BAD_REQUEST, "Grouping");
  }


  /** simple static test of some carefully crafted docs */
  public void testSimple() throws Exception {
    String cursorMark;
    SolrParams params = null;
    
    final String intsort = "int" + (random().nextBoolean() ? "" : "_dv");
    final String intmissingsort = intsort;

    // trivial base case: ensure cursorMark against an empty index doesn't blow up
    cursorMark = CURSOR_MARK_START;
    params = params("q", "*:*", 
                    "rows","4",
                    "fl", "id",
                    "sort", "id desc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==0"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              );
    assertEquals(CURSOR_MARK_START, cursorMark);


    // don't add in order of any field to ensure we aren't inadvertantly 
    // counting on internal docid ordering
    assertU(adoc("id", "9", "str", "c", "float", "-3.2", "int", "42"));
    assertU(adoc("id", "7", "str", "c", "float", "-3.2", "int", "-1976"));
    assertU(adoc("id", "2", "str", "c", "float", "-3.2", "int", "666"));
    assertU(adoc("id", "0", "str", "b", "float", "64.5", "int", "-42"));
    assertU(adoc("id", "5", "str", "b", "float", "64.5", "int", "2001"));
    assertU(adoc("id", "8", "str", "b", "float", "64.5", "int", "4055"));
    assertU(adoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
    assertU(adoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
    assertU(adoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
    assertU(adoc("id", "3", "str", "a", "float", "11.1")); // int is missing
    assertU(commit());

    // base case: ensure cursorMark that matches no docs doesn't blow up
    cursorMark = CURSOR_MARK_START;
    params = params("q", "id:9999999", 
                    "rows","4",
                    "fl", "id",
                    "sort", "id desc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==0"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              );
    assertEquals(CURSOR_MARK_START, cursorMark);

    // edge case: ensure rows=0 doesn't blow up and gives back same cursor for next
    cursorMark = CURSOR_MARK_START;
    params = params("q", "*:*", 
                    "rows","0",
                    "fl", "id",
                    "sort", "id desc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              );
    assertEquals(CURSOR_MARK_START, cursorMark);

    // simple id sort w/some faceting
    cursorMark = CURSOR_MARK_START;
    params = params("q", "-int:6", 
                    "rows","4",
                    "fl", "id",
                    "sort", "id desc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==9"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':9},{'id':8},{'id':7},{'id':6}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==9" 
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':5},{'id':3},{'id':2},{'id':1}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==9" 
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':0}]"
                              );
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark, 
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==9"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ));

    // simple score sort w/some faceting
    cursorMark = CURSOR_MARK_START;
    params = params("q", "float:[0 TO *] int:7 id:6", 
                    "rows","4",
                    "fl", "id",
                    "facet", "true",
                    "facet.field", "str",
                    "json.nl", "map",
                    "sort", "score desc, id desc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==7"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':6},{'id':1},{'id':8},{'id':5}]"
                              ,"/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==7"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':4},{'id':3},{'id':0}]"
                              ,"/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}"
                              );
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark,
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==7"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ,"/facet_counts/facet_fields/str=={'a':4,'b':3,'c':0}"
                              ));

    // int sort with dups, id tie breaker ... and some faceting
    cursorMark = CURSOR_MARK_START;
    params = params("q", "-int:2001 -int:4055", 
                    "rows","3",
                    "fl", "id",
                    "facet", "true",
                    "facet.field", "str",
                    "json.nl", "map",
                    "sort", intsort + " asc, id asc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':7},{'id':0},{'id':3}]"
                              ,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':4},{'id':1},{'id':6}]"
                              ,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':9},{'id':2}]"
                              ,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
                              );
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark,
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8" 
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
                              ));

    // int missing first sort with dups, id tie breaker
    cursorMark = CURSOR_MARK_START;
    params = params("q", "-int:2001 -int:4055", 
                    "rows","3",
                    "fl", "id",
                    "json.nl", "map",
                    "sort", intmissingsort + "_first asc, id asc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':3},{'id':7},{'id':0}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':4},{'id':1},{'id':6}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':9},{'id':2}]"
                              );
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark,
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8" 
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ));

    // int missing last sort with dups, id tie breaker
    cursorMark = CURSOR_MARK_START;
    params = params("q", "-int:2001 -int:4055", 
                    "rows","3",
                    "fl", "id",
                    "json.nl", "map",
                    "sort", intmissingsort + "_last asc, id asc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':7},{'id':0},{'id':4}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':1},{'id':6},{'id':9}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':2},{'id':3}]"
                              );
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark,
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8" 
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ));

    // string sort with dups, id tie breaker
    cursorMark = CURSOR_MARK_START;
    params = params("q", "*:*", 
                    "rows","6",
                    "fl", "id",
                    "sort", "str asc, id desc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':6},{'id':4},{'id':3},{'id':1},{'id':8},{'id':5}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':0},{'id':9},{'id':7},{'id':2}]"
                              );
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark,
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ));

    // tri-level sort with more dups of primary then fit on a page
    cursorMark = CURSOR_MARK_START;
    params = params("q", "*:*", 
                    "rows","2",
                    "fl", "id",
                    "sort", "float asc, "+intsort+" desc, id desc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':2},{'id':9}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':7},{'id':4}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10" 
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':3},{'id':8}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':5},{'id':6}]"
                              );
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':1},{'id':0}]"
                              );
    // we've exactly exhausted all the results, but solr had no way of know that
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark,
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ));

    // trivial base case: rows bigger then number of matches
    cursorMark = CURSOR_MARK_START;
    params = params("q", "id:3 id:7", 
                    "rows","111",
                    "fl", "id",
                    "sort", intsort + " asc, id asc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==2"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':7},{'id':3}]"
                              );
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark,
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==2"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ));

    // sanity check our full walk method
    SentinelIntSet ids;
    ids = assertFullWalkNoDups(10, params("q", "*:*", 
                                          "rows", "4",
                                          "sort", "id desc"));
    assertEquals(10, ids.size());
    ids = assertFullWalkNoDups(9, params("q", "*:*", 
                                         "rows", "1",
                                         "fq", "-id:4",
                                         "sort", "id asc"));
    assertEquals(9, ids.size());
    assertFalse("matched on id:4 unexpectedly", ids.exists(4));
    ids = assertFullWalkNoDups(9, params("q", "*:*", 
                                         "rows", "3",
                                         "fq", "-id:6",
                                         "sort", "float desc, id asc, "+intsort+" asc"));
    assertEquals(9, ids.size());
    assertFalse("matched on id:6 unexpectedly", ids.exists(6));
    ids = assertFullWalkNoDups(9, params("q", "float:[0 TO *] int:7 id:6", 
                                         "rows", "3",
                                         "sort", "score desc, id desc"));
    assertEquals(7, ids.size());
    assertFalse("matched on id:9 unexpectedly", ids.exists(9));
    assertFalse("matched on id:7 unexpectedly", ids.exists(7));
    assertFalse("matched on id:2 unexpectedly", ids.exists(2));

    // strategically delete/add some docs in the middle of walking the cursor
    cursorMark = CURSOR_MARK_START;
    params = params("q", "*:*", 
                    "rows","2",
                    "fl", "id",
                    "sort", "str asc, id asc");
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==10"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':1},{'id':3}]"
                              );
    // delete the last guy we got
    assertU(delI("3")); 
    assertU(commit());
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==9"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':4},{'id':6}]"
                              );
    // delete the next guy we expect
    assertU(delI("0")); 
    assertU(commit());
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':5},{'id':8}]"
                              );
    // update a doc we've already seen so it repeats
    assertU(adoc("id", "5", "str", "c"));
    assertU(commit());
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':2},{'id':5}]"
                              );
    // update the next doc we expect so it's now in the past
    assertU(adoc("id", "7", "str", "a"));
    assertU(commit());
    cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[{'id':9}]"
                              );
    // no more, so no change to cursorMark, and no new docs
    assertEquals(cursorMark,
                 assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
                              ,"/response/numFound==8"
                              ,"/response/start==0"
                              ,"/response/docs==[]"
                              ));
  }

  /**
   * test that our assumptions about how caches are affected hold true
   */
  public void testCacheImpacts() throws Exception {
    // cursor queryies can't live in the queryResultCache, but independent filters
    // should still be cached & reused

    // don't add in order of any field to ensure we aren't inadvertantly 
    // counting on internal docid ordering
    assertU(adoc("id", "9", "str", "c", "float", "-3.2", "int", "42"));
    assertU(adoc("id", "7", "str", "c", "float", "-3.2", "int", "-1976"));
    assertU(adoc("id", "2", "str", "c", "float", "-3.2", "int", "666"));
    assertU(adoc("id", "0", "str", "b", "float", "64.5", "int", "-42"));
    assertU(adoc("id", "5", "str", "b", "float", "64.5", "int", "2001"));
    assertU(adoc("id", "8", "str", "b", "float", "64.5", "int", "4055"));
    assertU(adoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
    assertU(adoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
    assertU(adoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
    assertU(adoc("id", "3", "str", "a", "float", "11.1", "int", "3"));
    assertU(commit());

    final Collection<String> allFieldNames = getAllSortFieldNames();

    final MetricsMap filterCacheStats =
        (MetricsMap)h.getCore().getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.filterCache");
    assertNotNull(filterCacheStats);
    final MetricsMap queryCacheStats =
        (MetricsMap)h.getCore().getCoreMetricManager().getRegistry().getMetrics().get("CACHE.searcher.queryResultCache");
    assertNotNull(queryCacheStats);

    final long preQcIn = (Long) queryCacheStats.getValue().get("inserts");
    final long preFcIn = (Long) filterCacheStats.getValue().get("inserts");
    final long preFcHits = (Long) filterCacheStats.getValue().get("hits");

    SentinelIntSet ids = assertFullWalkNoDups
      (10, params("q", "*:*",
                  "rows",""+ TestUtil.nextInt(random(), 1, 11),
                  "fq", "-id:[1 TO 2]",
                  "fq", "-id:[6 TO 7]",
                  "fl", "id",
                  "sort", buildRandomSort(allFieldNames)));
    
    assertEquals(6, ids.size());

    final long postQcIn = (Long) queryCacheStats.getValue().get("inserts");
    final long postFcIn = (Long) filterCacheStats.getValue().get("inserts");
    final long postFcHits = (Long) filterCacheStats.getValue().get("hits");
    
    assertEquals("query cache inserts changed", preQcIn, postQcIn);
    // NOTE: use of pure negative filters causees "*:* to be tracked in filterCache
    assertEquals("filter cache did not grow correctly", 3, postFcIn-preFcIn);
    assertTrue("filter cache did not have any new cache hits", 0 < postFcHits-preFcHits);

  }

  /** randomized testing of a non-trivial number of docs using assertFullWalkNoDups 
   */
  public void testRandomSortsOnLargeIndex() throws Exception {
    final Collection<String> allFieldNames = getAllSortFieldNames();

    final int initialDocs = TestUtil.nextInt(random(), 100, 200);
    final int totalDocs = atLeast(500);

    // start with a smallish number of documents, and test that we can do a full walk using a 
    // sort on *every* field in the schema...

    for (int i = 1; i <= initialDocs; i++) {
      SolrInputDocument doc = buildRandomDocument(i);
      assertU(adoc(doc));
    }
    assertU(commit());

    for (String f : allFieldNames) {
      for (String order : new String[] {" asc", " desc"}) {
        String sort = f + order + ("id".equals(f) ? "" : ", id" + order);
        String rows = "" + TestUtil.nextInt(random(), 13, 50);
        SentinelIntSet ids = assertFullWalkNoDups(totalDocs, 
                                                  params("q", "*:*",
                                                         "fl","id",
                                                         "rows",rows,
                                                         "sort",sort));
        assertEquals(initialDocs, ids.size());
      }
    }

    // now add a lot more docs, and test a handful of randomized sorts
    for (int i = initialDocs+1; i <= totalDocs; i++) {
      SolrInputDocument doc = buildRandomDocument(i);
      assertU(adoc(doc));
    }
    assertU(commit());

    final int numRandomSorts = atLeast(3);
    for (int i = 0; i < numRandomSorts; i++) {
      final String sort = buildRandomSort(allFieldNames);
      final String rows = "" + TestUtil.nextInt(random(), 63, 113);
      final String fl = random().nextBoolean() ? "id" : "id,score";
      final boolean matchAll = random().nextBoolean();
      final String q = matchAll ? "*:*" : buildRandomQuery();

      SentinelIntSet ids = assertFullWalkNoDups(totalDocs, 
                                                params("q", q,
                                                       "fl",fl,
                                                       "rows",rows,
                                                       "sort",sort));
      if (matchAll) {
        assertEquals(totalDocs, ids.size());
      }

    }
  }

  /** Similar to usually() but we want it to happen just as often regardless
   * of test multiplier and nightly status 
   */
  private static boolean useField() {
    return 0 != TestUtil.nextInt(random(), 0, 30);
  }
  
  /**
   * An immutable list of the fields in the schema that can be used for sorting,
   * deterministically random order.
   */
  private List<String> getAllSortFieldNames() {
    return pruneAndDeterministicallySort
      (h.getCore().getLatestSchema().getFields().keySet());
  }

  
  /**
   * <p>
   * Given a list of field names in the schema, returns an immutable list in 
   * deterministically random order with the following things removed:
   * </p>
   * <ul>
   *  <li><code>_version_</code> is removed</li>
   * </ul>
   */
  public static List<String> pruneAndDeterministicallySort(Collection<String> raw) {

    ArrayList<String> names = new ArrayList<>(37);
    for (String f : raw) {
      if (f.equals("_version_")) {
        continue;
      }
      names.add(f);
    }

    Collections.sort(names);
    Collections.shuffle(names,random());
    return Collections.<String>unmodifiableList(names);
  }

  /**
   * Given a set of params, executes a cursor query using {@link CursorMarkParams#CURSOR_MARK_START}
   * and then continuously walks the results using {@link CursorMarkParams#CURSOR_MARK_START} as long
   * as a non-0 number of docs ar returned.  This method records the the set of all id's
   * (must be positive ints) encountered and throws an assertion failure if any id is
   * encountered more than once, or if the set grows above maxSize
   */
  public SentinelIntSet assertFullWalkNoDups(int maxSize, SolrParams params) 
    throws Exception {

    SentinelIntSet ids = new SentinelIntSet(maxSize, -1);
    String cursorMark = CURSOR_MARK_START;
    int docsOnThisPage = Integer.MAX_VALUE;
    while (0 < docsOnThisPage) {
      String json = assertJQ(req(params,
                                 CURSOR_MARK_PARAM, cursorMark));
      Map rsp = (Map) ObjectBuilder.fromJSON(json);
      assertTrue("response doesn't contain " + CURSOR_MARK_NEXT + ": " + json,
                 rsp.containsKey(CURSOR_MARK_NEXT));
      String nextCursorMark = (String)rsp.get(CURSOR_MARK_NEXT);
      assertNotNull(CURSOR_MARK_NEXT + " is null", nextCursorMark);
      List<Map<Object,Object>> docs = (List) (((Map)rsp.get("response")).get("docs"));
      docsOnThisPage = docs.size();
      if (null != params.getInt(CommonParams.ROWS)) {
        int rows = params.getInt(CommonParams.ROWS);
        assertTrue("Too many docs on this page: " + rows + " < " + docsOnThisPage,
                   docsOnThisPage <= rows);
      }
      if (0 == docsOnThisPage) {
        assertEquals("no more docs, but "+CURSOR_MARK_NEXT+" isn't same",
                     cursorMark, nextCursorMark);
      }
      for (Map<Object,Object> doc : docs) {
        int id = ((Long)doc.get("id")).intValue();
        assertFalse("walk already seen: " + id, ids.exists(id));
        ids.put(id);
        assertFalse("id set bigger then max allowed ("+maxSize+"): " + ids.size(),
                    maxSize < ids.size());
      }
      cursorMark = nextCursorMark;
    }
    return ids;
  }

  /**
   * test faceting with deep paging
   */
  public void testFacetingWithRandomSorts() throws Exception {
    final int numDocs = TestUtil.nextInt(random(), 1000, 3000);
    String[] fieldsToFacetOn = { "int", "long", "str" };
    String[] facetMethods = { "enum", "fc", "fcs" };

    for (int i = 1; i <= numDocs; i++) {
      SolrInputDocument doc = buildRandomDocument(i);
      assertU(adoc(doc));
    }
    assertU(commit());

    Collection<String> allFieldNames = getAllSortFieldNames();
    String[] fieldNames = new String[allFieldNames.size()];
    allFieldNames.toArray(fieldNames);
    String f = fieldNames[TestUtil.nextInt(random(), 0, fieldNames.length - 1)];
    String order = 0 == TestUtil.nextInt(random(), 0, 1) ? " asc" : " desc";
    String sort = f + order + (f.equals("id") ? "" : ", id" + order);
    String rows = "" + TestUtil.nextInt(random(), 13, 50);
    String facetField = fieldsToFacetOn
        [TestUtil.nextInt(random(), 0, fieldsToFacetOn.length - 1)];
    String facetMethod = facetMethods
        [TestUtil.nextInt(random(), 0, facetMethods.length - 1)];
    SentinelIntSet ids = assertFullWalkNoDupsWithFacets
        (numDocs, params("q", "*:*",
            "fl", "id," + facetField,
            "facet", "true",
            "facet.field", facetField,
            "facet.method", facetMethod,
            "facet.missing", "true",
            "facet.limit", "-1", // unlimited
            "rows", rows,
            "sort", sort));
    assertEquals(numDocs, ids.size());
  }

  /**
   * Given a set of params, executes a cursor query using {@link CursorMarkParams#CURSOR_MARK_START}
   * and then continuously walks the results using {@link CursorMarkParams#CURSOR_MARK_START} as long
   * as a non-0 number of docs ar returned.  This method records the the set of all id's
   * (must be positive ints) encountered and throws an assertion failure if any id is
   * encountered more than once, or if the set grows above maxSize.
   *
   * Also checks that facets are the same with each page, and that they are correct.
   */
  public SentinelIntSet assertFullWalkNoDupsWithFacets(int maxSize, SolrParams params)
      throws Exception {

    final String facetField = params.get("facet.field");
    assertNotNull("facet.field param not specified", facetField);
    assertFalse("facet.field param contains multiple values", facetField.contains(","));
    assertEquals("facet.limit param not set to -1", "-1", params.get("facet.limit"));
    final Map<String,MutableValueInt> facetCounts = new HashMap<>();
    SentinelIntSet ids = new SentinelIntSet(maxSize, -1);
    String cursorMark = CURSOR_MARK_START;
    int docsOnThisPage = Integer.MAX_VALUE;
    List previousFacets = null;
    while (0 < docsOnThisPage) {
      String json = assertJQ(req(params, CURSOR_MARK_PARAM, cursorMark));
      Map rsp = (Map) ObjectBuilder.fromJSON(json);
      assertTrue("response doesn't contain " + CURSOR_MARK_NEXT + ": " + json,
                 rsp.containsKey(CURSOR_MARK_NEXT));
      String nextCursorMark = (String)rsp.get(CURSOR_MARK_NEXT);
      assertNotNull(CURSOR_MARK_NEXT + " is null", nextCursorMark);
      List<Map<Object,Object>> docs = (List)(((Map)rsp.get("response")).get("docs"));
      docsOnThisPage = docs.size();
      if (null != params.getInt(CommonParams.ROWS)) {
        int rows = params.getInt(CommonParams.ROWS);
        assertTrue("Too many docs on this page: " + rows + " < " + docsOnThisPage,
                   docsOnThisPage <= rows);
      }
      if (0 == docsOnThisPage) {
        assertEquals("no more docs, but "+CURSOR_MARK_NEXT+" isn't same",
                     cursorMark, nextCursorMark);
      }
      for (Map<Object,Object> doc : docs) {
        int id = ((Long)doc.get("id")).intValue();
        assertFalse("walk already seen: " + id, ids.exists(id));
        ids.put(id);
        assertFalse("id set bigger then max allowed ("+maxSize+"): " + ids.size(),
                    maxSize < ids.size());
        Object facet = doc.get(facetField);
        String facetString = null == facet ? null : facet.toString(); // null: missing facet value
        MutableValueInt count = facetCounts.get(facetString);
        if (null == count) {
          count = new MutableValueInt();
          facetCounts.put(facetString, count);
        }
        ++count.value;
      }
      cursorMark = nextCursorMark;

      Map facetFields = (Map)((Map)rsp.get("facet_counts")).get("facet_fields");
      List facets = (List)facetFields.get(facetField);
      if (null != previousFacets) {
        assertEquals("Facets not the same as on previous page:\nprevious page facets: "
            + Arrays.toString(facets.toArray(new Object[facets.size()]))
            + "\ncurrent page facets: "
            + Arrays.toString(previousFacets.toArray(new Object[previousFacets.size()])),
            previousFacets, facets);
      }
      previousFacets = facets;
    }

    assertNotNull("previousFacets is null", previousFacets);
    assertEquals("Mismatch in number of facets: ", facetCounts.size(), previousFacets.size() / 2);
    int pos;
    for (pos = 0 ; pos < previousFacets.size() ; pos += 2) {
      String label = (String)previousFacets.get(pos);
      int expectedCount = ((Number)previousFacets.get(pos + 1)).intValue();
      MutableValueInt count = facetCounts.get(label);
      assertNotNull("Expected facet label #" + (pos / 2) + " not found: '" + label + "'", count);
      assertEquals("Facet count mismatch for label #" + (pos / 2) + " '" + label + "'", expectedCount,
                   facetCounts.get(label).value);
      pos += 2;
    }
    return ids;
  }

  /**
   * Asserts that the query matches the specified JSON patterns and then returns the
   * {@link CursorMarkParams#CURSOR_MARK_NEXT} value from the response
   *
   * @see #assertJQ
   */
  public String assertCursor(SolrQueryRequest req, String... tests) throws Exception {
    String json = assertJQ(req, tests);
    Map rsp = (Map) ObjectBuilder.fromJSON(json);
    assertTrue("response doesn't contain "+CURSOR_MARK_NEXT + ": " + json,
               rsp.containsKey(CURSOR_MARK_NEXT));
    String next = (String)rsp.get(CURSOR_MARK_NEXT);
    assertNotNull(CURSOR_MARK_NEXT + " is null", next);
    return next;
  }

  /**
   * execute a local request, verify that we get an expected error
   */
  public void assertFail(SolrParams p, ErrorCode expCode, String expSubstr) 
    throws Exception {

    try {
      ignoreException(expSubstr);
      assertJQ(req(p));
      fail("no exception matching expected: " + expCode.code + ": " + expSubstr);
    } catch (SolrException e) {
      assertEquals(expCode.code, e.code());
      assertTrue("Expected substr not found: " + expSubstr + " <!< " + e.getMessage(),
                 e.getMessage().contains(expSubstr));
    } finally {
      unIgnoreException(expSubstr);
    }
  }

  /**
   * Creates a document with randomized field values, some of which be missing values, 
   * and some of which will be skewed so that small subsets of the ranges will be 
   * more common (resulting in an increased likelihood of duplicate values)
   * 
   * @see #buildRandomQuery
   */
  public static SolrInputDocument buildRandomDocument(int id) {
    SolrInputDocument doc = sdoc("id", id);
    // most fields are in most docs
    // if field is in a doc, then "skewed" chance val is from a dense range
    // (hopefully with lots of duplication)
    if (useField()) {
      doc.addField("int", skewed(random().nextInt(), 
                                 TestUtil.nextInt(random(), 20, 50)));
    }
    if (useField()) {
      doc.addField("long", skewed(random().nextLong(), 
                                  TestUtil.nextInt(random(), 5000, 5100)));
    }
    if (useField()) {
      doc.addField("float", skewed(random().nextFloat() * random().nextInt(), 
                                   1.0F / random().nextInt(23)));
    }
    if (useField()) {
      doc.addField("double", skewed(random().nextDouble() * random().nextInt(), 
                                    1.0D / random().nextInt(37)));
    }
    if (useField()) {
      doc.addField("str", skewed(randomXmlUsableUnicodeString(),
                                 TestUtil.randomSimpleString(random(), 1, 1)));
    }
    if (useField()) {
      int numBytes = (int) skewed(TestUtil.nextInt(random(), 20, 50), 2);
      byte[] randBytes = new byte[numBytes];
      random().nextBytes(randBytes);
      doc.addField("bin", ByteBuffer.wrap(randBytes));
    }
    if (useField()) {
      doc.addField("date", skewed(randomDate(), randomSkewedDate()));
    }
    if (useField()) {
      doc.addField("uuid", UUID.randomUUID().toString());
    }
    if (useField()) {
      doc.addField("currency", skewed("" + (random().nextInt() / 100.) + "," + randomCurrency(),
                                      "" + TestUtil.nextInt(random(), 250, 320) + ",USD"));
    }
    if (useField()) {
      doc.addField("bool", random().nextBoolean() ? "t" : "f");
    }
    if (useField()) {
      doc.addField("enum", randomEnumValue());
    }
    return doc;
  }

  /**
   * Generates a random query using the fields populated by 
   * {@link #buildRandomDocument}.  Queries will typically be fairly simple, but 
   * won't be so trivial that the scores are completely constant.
   */
  public static String buildRandomQuery() {
    List<String> numericFields = Arrays.asList("int","long","float","double");
    Collections.shuffle(numericFields, random());
    if (random().nextBoolean()) {
      // simple function query across one field.
      return "{!func}" + numericFields.get(0);
    } else {
      // several SHOULD clauses on range queries
      int low = TestUtil.nextInt(random(), -2379, 2);
      int high = TestUtil.nextInt(random(), 4, 5713);
      return 
        numericFields.get(0) + ":[* TO 0] " +
        numericFields.get(1) + ":[0 TO *] " +
        numericFields.get(2) + ":[" + low + " TO " + high + "]";
    }
  }

  private static final String[] currencies = { "USD", "EUR", "NOK" };

  public static String randomCurrency() {
    return currencies[random().nextInt(currencies.length)];
  }

  private static String randomEnumValue() {
    return SEVERITY_ENUM_VALUES[random().nextInt(SEVERITY_ENUM_VALUES.length)];
  }

  /**
   * Given a list of fieldNames, builds up a random sort string which is guaranteed to
   * have at least 3 clauses, ending with the "id" field for tie breaking
   */
  public static String buildRandomSort(final Collection<String> fieldNames) {

    ArrayList<String> shuffledNames = new ArrayList<>(fieldNames);
    Collections.replaceAll(shuffledNames, "id", "score");
    Collections.shuffle(shuffledNames, random());

    final StringBuilder result = new StringBuilder();
    final int numClauses = TestUtil.nextInt(random(), 2, 5);

    for (int i = 0; i < numClauses; i++) {
      String field = shuffledNames.get(i);

      // wrap in a function sometimes
      if ( ! "score".equals(field) && 0 == TestUtil.nextInt(random(), 0, 7)) {
        // specific function doesn't matter, just proving that we can handle the concept.
        // but we do have to be careful with non numeric fields
        if (field.contains("float") || field.contains("double")
            || field.contains("int") || field.contains("long")) {
          field = "abs(" + field + ")";
        } else {
          field = "if(exists(" + field + "),47,83)";
        }
      }
      result.append(field).append(random().nextBoolean() ? " asc, " : " desc, ");
    }
    result.append("id").append(random().nextBoolean() ? " asc" : " desc");
    return result.toString();
  }

}