TestBlock.java example

Explorer
heliosearch-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search.join;

import org.apache.solr.JSONTestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
import org.noggit.ObjectBuilder;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Random;

public class TestBlock extends SolrTestCaseJ4 {

  @BeforeClass
  public static void beforeTests() throws Exception {
    initCore("solrconfig.xml","schema15.xml");
  }

  long totalFound;
  long queriesRun;
  long queriesWithResults;

  private void trackResults(Object jsonReponse) {
    queriesRun++;

    Object o =  ((Map<String,Map<String,Object>>)jsonReponse).get("response").get("numFound");
    int found = ((Number)o).intValue();
    totalFound += found;
    if (found>0) {
      queriesWithResults++;
    }
  }

  private void printResults() {
    log.info("QUERY TRACKING RESULTS: queriesRun="+queriesRun + " queriesWithResults="+queriesWithResults + " average num results=" + ((double)totalFound)/queriesRun );
  }

  private void dumpIndex() throws Exception {
    // TODO: stream/page for big indexes
    SolrQueryRequest req = req("wt","json","indent","true", "_trace","DUMPING INDEX", "echoParams","all",
        "q","*:*", "rows","1000000"
    );

    String rsp = h.query(req);
    log.error("INDEX DUMP :" + rsp);
  }

  private void runQueryAsFilter(ModifiableSolrParams params) {
    params.add("fq",params.get("q"));
    params.set("q","*:*");
  }

  ModifiableSolrParams genericParams = params("wt","json","indent","true", "echoParams","all", "rows","1000000", "fl","id");
  public void doCompare(String q1, String q2, ModifiableSolrParams p) throws Exception {

    ModifiableSolrParams params = params("q", q1);
    params.add(genericParams);
    params.add(p);
    ModifiableSolrParams params2 = params("q", q2);
    params2.add(genericParams);
    params2.add(p);

    boolean runAsFilter = random().nextInt(100) < 20;
    if (runAsFilter) runQueryAsFilter(params);
    if (runAsFilter) runQueryAsFilter(params2);

    SolrQueryRequest req = req(params);
    SolrQueryRequest req2 = req(params2);

    String rsp = h.query(req);
    String rsp2 = h.query(req2);

    Object expected = ObjectBuilder.fromJSON(rsp);
    Object got = ObjectBuilder.fromJSON(rsp2);

    String err = JSONTestUtil.matchObj("/response", got, ((Map) expected).get("response"));
    if (err != null) {
      log.error("JOIN MISMATCH: " + err
          + "\n\texpected="+ rsp
          + "\n\tgot     ="+ rsp2
      );

      dumpIndex();

      // re-execute the request... good for putting a breakpoint here for debugging
      String retry = h.query(req2);

      fail(err);
    }

    trackResults(got);
  }

  public void toParent(String childQuery, String parentType, String... moreParams) throws Exception {
    boolean cache = random().nextInt(100) < 20;
    String joinQ = "{!join from="+parentType+"_s to=id "
        + (cache ? "":" cache=false")
        + "}" + childQuery;
    String blockQ = "{!parent which=type_s:"+parentType
        + (cache ? "":" cache=false")
        +"}" + childQuery;

    doCompare(joinQ, blockQ, params(moreParams));
  }

  public void toChildren(String parentQuery, String parentType, String... moreParams) throws Exception {
    boolean cache = random().nextInt(100) < 20;
    String joinQ = "{!join from=id to="+parentType+"_s"
        + (cache ? "":" cache=false")
        +"}" + parentQuery;

    // for block child join, any doc not marked as a parent is treated as a child.
    // we need to mark all other docs (grandparents, etc) as parents...
    StringBuilder sb = new StringBuilder("type_s:(");
    int lev = parentType.charAt(0) - 'a';
    for (int i=0; i<=lev; i++) {
      sb.append(type(i)).append(' ');
    }
    sb.append(")");
    String allParentTypes=sb.toString();

    String blockQ = "{!child of='"+allParentTypes+"'"
        + (cache ? "":" cache=false")
        +"}" + parentQuery;

    doCompare(joinQ, blockQ, params(moreParams));
  }




  @Test
  public void testJoin() throws Exception {
    // a_s points to parents of type "a", b_s points to parents of type "b", etc.
    SolrInputDocument doc = sdoc("id","1", "type_s","a");

    /***
    SolrInputDocument doc2 = sdoc("id","2", "type_s","b", "parent_s","1", "a_s","1"));
    SolrInputDocument doc3 = sdoc("id","3", "type_s","b", "parent_s","1", "a_s","1"));
    SolrInputDocument doc4 = sdoc("id","4", "type_s","c", "parent_s","3", "a_s","1", "b_s","3"));
    SolrInputDocument doc5 = sdoc("id","5", "type_s","c", "parent_s","3", "a_s","1", "b_s","3"));
    SolrInputDocument doc6 = sdoc("id","6", "type_s","b", "parent_s","1", "a_s","1"));
    SolrInputDocument doc7 = sdoc("id","7", "type_s","b", "parent_s","1", "a_s","1"));
    SolrInputDocument doc8 = sdoc("id","8", "type_s","c", "parent_s","7", "a_s","1", "b_s","7"));
    SolrInputDocument doc9 = sdoc("id","9", "type_s","c", "parent_s","7", "a_s","1", "b_s","7"));
    ***/

    doc.addChildDocument(sdoc("id","2", "type_s","b", "parent_s","1", "a_s","1"));
    doc.addChildDocument(sdoc("id","4", "type_s","c", "parent_s","3", "a_s","1", "b_s","3"));
    doc.addChildDocument(sdoc("id","5", "type_s","c", "parent_s","3", "a_s","1", "b_s","3"));
    doc.addChildDocument(sdoc("id","3", "type_s","b", "parent_s","1", "a_s","1"));
    doc.addChildDocument(sdoc("id","6", "type_s","b", "parent_s","1", "a_s","1"));
    doc.addChildDocument(sdoc("id","8", "type_s","c", "parent_s","7", "a_s","1", "b_s","7"));
    doc.addChildDocument(sdoc("id","9", "type_s","c", "parent_s","7", "a_s","1", "b_s","7"));
    doc.addChildDocument(sdoc("id","7", "type_s","b", "parent_s","1", "a_s","1"));

    assertU(adoc(doc));
    assertU(adoc(doc));       // overwrite original
    assertU(commit());


    // for children, the parent filter needs to be any type of parent, else we will think they are a child...
    toChildren("id:1", "a");
    toChildren("id:1", "a");
    toChildren("id:2", "b");
    toChildren("id:3", "b");
    toChildren("id:(2 6 7)", "b");
    toChildren("id:(6)", "b");
    toChildren("id:(7)", "b");
    toChildren("id:(4 8)", "c");  // test leaf (should match no children)
    toChildren("id:(4)", "c");  // test leaf (should match no children)
    toChildren("id:(5)", "c");  // test leaf (should match no children)

    // child("id:(2 6 7)", "a");  // error case - 2 6 and 7 are not of type "a"... will lead to repeated docs collected

    /***
    SolrQueryRequest req = req("wt","json","indent","true", "echoParams","all",
        "q","{!join from=id to=b_s}id:3"
    );

    String rsp = h.query(req);
    System.out.println(rsp);

    req = req("wt","json","indent","true", "echoParams","all",
        "q","{!child of=type_s:b}id:3"
    );

    rsp = h.query(req);
    System.out.println(rsp);
    ***/


  }



  int id;
  int MAX_LEVEL=5;
  int MAX_CHILDREN=4;
  int CHANCE_CHILDREN=30;  // percent chance a document has a child, reduced by 4*current_level

  private String type(int level) {
    return Character.toString((char)('a'+level));
  }
  private String typeField(int level) {
    return type(level)+"_s";
  }


  private boolean mandateChildren=false;
  private SolrInputDocument randDoc(List<String> parents) {
    if (parents == null) parents=Collections.EMPTY_LIST;

    int level=parents.size();
    SolrInputDocument sdoc = new SolrInputDocument();

    String typeStr = type(level);
    String idStr = Integer.toString(++id) + typeStr;  // append the type to the id, for easier debugging
    sdoc.addField("id", idStr);
    sdoc.addField("type_s", typeStr);
    sdoc.addField("level_i", level);

    // add a_s:<parent_id>, b_s:<parent_id>, etc.
    for (int i=0; i<parents.size(); i++) {
      sdoc.addField(typeField(i), parents.get(i));
    }

    if ( (level < MAX_LEVEL && random().nextInt(100) < (CHANCE_CHILDREN-(level*4)))
         || (level==0 && mandateChildren)
        ) {  // lower chance for children based on level
      int nChildren = random().nextInt(MAX_CHILDREN)+1;
      List<String> newParents = new ArrayList<String>(parents);
      newParents.add(idStr);
      for (int i=0; i<nChildren; i++) {
        SolrInputDocument child = randDoc(newParents);
        sdoc.addChildDocument(child);
      }
    }

    return sdoc;
  }


  int maxDepth;
  List<SolrInputDocument> allDocs;       // all documents indexed
  List<SolrInputDocument>[] docs;        // documents separated by level
  List<SolrInputDocument>[] childDocs;   // docs at the current level and below, used as a working set and shuffled

  private void populate(SolrInputDocument sdoc, int level) {
    maxDepth = Math.max(maxDepth,level);
    docs[level].add(sdoc);
    allDocs.add(sdoc);

    for (int i=level; i<=MAX_LEVEL; i++) {
      childDocs[level].add(sdoc);
    }

    if (sdoc.hasChildDocuments()) {
      for (SolrInputDocument child : sdoc.getChildDocuments()) {
        populate(child, level+1);
      }
    }
  }

  public void indexBlocks(int nDocs) {
    id = 0;
    maxDepth = 0;
    allDocs = new ArrayList<SolrInputDocument>();
    docs = (List<SolrInputDocument>[]) new List[MAX_LEVEL+1];
    childDocs = (List<SolrInputDocument>[]) new List[MAX_LEVEL+1];
    for (int i=0; i<docs.length; i++) {
      docs[i]=new ArrayList<SolrInputDocument>();
      childDocs[i]=new ArrayList<SolrInputDocument>();
    }

    clearIndex();
    while (--nDocs >= 0) {
      SolrInputDocument sdoc = randDoc(null);
      assertU(adoc(sdoc));
      if (random().nextInt(100) < 5) {
        assertU(commit());
      }
      if (random().nextInt(100) < 10) {
        // overwrite the previous doc
        assertU(adoc(sdoc));
      }

      populate(sdoc, 0);
    }
    assertU(commit());
  }

  String randomIds(List<SolrInputDocument> lst, int nDocs) {
    Random r = random();
    Collections.shuffle(lst, r);
    if (nDocs <= 0) {
      nDocs = r.nextInt(lst.size());
      nDocs = r.nextInt(nDocs + 1);   // do random twice to cut down the average size
      nDocs++;
    }

    StringBuilder sb = new StringBuilder("id:(");
    for (int i=0; i<nDocs; i++) {
      sb.append(lst.get(i).getFieldValue("id"));
      sb.append(' ');
    }
    sb.append(")");

    String mainQ=sb.toString();
    return mainQ;
  }


  @Test
  public void testRandomJoin() throws Exception {
    int indexIter=25;
    int queryIter=100;

    for (int iiter=0; iiter<indexIter; iiter++) {
      int topLevelDocs=random().nextInt(30)+1;
      indexBlocks(topLevelDocs);
      if (maxDepth<=1) continue;  // need more than one level to test

      for(int qiter=0; qiter<queryIter; qiter++) {
        // use filters to exercise skipping on the scorers too
        String filter = null;
        if (random().nextInt(100) < 70) {  // filter 70 percent of the queries
          int filterSize;

          // we normally want filters that match a majority of the documents for
          // better coverage (else results will often match because everything is filtered out.
          if (random().nextInt(100)<70) {
            filterSize = (int)(allDocs.size()*.9);  // match 90 percent of the documents
          } else {
            filterSize = random().nextInt(allDocs.size());
          }

          filterSize = Math.max(1, filterSize);

          filter = randomIds(allDocs, filterSize);

          if (random().nextInt(100) < 50) {
            filter = "{!cache=false}"+filter;
          }
        }

        // query some children and match to their parents.
        // min level is 1 since we're not supposed to match a parent with the query
        int childLevel = random().nextInt(maxDepth-1)+1;

        // we can map to any parent level above the child level
        int parentLevel = random().nextInt(childLevel);

        // collect some ids of type childLevel or below
        String mainQ = randomIds(childDocs[childLevel], 0);

        if (filter == null) {
          toParent(mainQ, type(parentLevel));
        } else {
          toParent(mainQ, type(parentLevel), "fq", filter);
        }

        parentLevel = random().nextInt(maxDepth);

        // For toChildren, the join query must only hit parent types (defined by the parent filter)
        mainQ = randomIds(docs[parentLevel], 0);

        if (filter == null) {
          toChildren(mainQ, type(parentLevel));
        } else {
          toChildren(mainQ, type(parentLevel), "fq", filter);
        }

      }

    }

    printResults();
  }


}