/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.join;
import org.apache.solr.JSONTestUtil;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.junit.BeforeClass;
import org.junit.Test;
import org.noggit.ObjectBuilder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Random;
public class TestBlock extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig.xml","schema15.xml");
}
long totalFound;
long queriesRun;
long queriesWithResults;
private void trackResults(Object jsonReponse) {
queriesRun++;
Object o = ((Map<String,Map<String,Object>>)jsonReponse).get("response").get("numFound");
int found = ((Number)o).intValue();
totalFound += found;
if (found>0) {
queriesWithResults++;
}
}
private void printResults() {
log.info("QUERY TRACKING RESULTS: queriesRun="+queriesRun + " queriesWithResults="+queriesWithResults + " average num results=" + ((double)totalFound)/queriesRun );
}
private void dumpIndex() throws Exception {
// TODO: stream/page for big indexes
SolrQueryRequest req = req("wt","json","indent","true", "_trace","DUMPING INDEX", "echoParams","all",
"q","*:*", "rows","1000000"
);
String rsp = h.query(req);
log.error("INDEX DUMP :" + rsp);
}
private void runQueryAsFilter(ModifiableSolrParams params) {
params.add("fq",params.get("q"));
params.set("q","*:*");
}
ModifiableSolrParams genericParams = params("wt","json","indent","true", "echoParams","all", "rows","1000000", "fl","id");
public void doCompare(String q1, String q2, ModifiableSolrParams p) throws Exception {
ModifiableSolrParams params = params("q", q1);
params.add(genericParams);
params.add(p);
ModifiableSolrParams params2 = params("q", q2);
params2.add(genericParams);
params2.add(p);
boolean runAsFilter = random().nextInt(100) < 20;
if (runAsFilter) runQueryAsFilter(params);
if (runAsFilter) runQueryAsFilter(params2);
SolrQueryRequest req = req(params);
SolrQueryRequest req2 = req(params2);
String rsp = h.query(req);
String rsp2 = h.query(req2);
Object expected = ObjectBuilder.fromJSON(rsp);
Object got = ObjectBuilder.fromJSON(rsp2);
String err = JSONTestUtil.matchObj("/response", got, ((Map) expected).get("response"));
if (err != null) {
log.error("JOIN MISMATCH: " + err
+ "\n\texpected="+ rsp
+ "\n\tgot ="+ rsp2
);
dumpIndex();
// re-execute the request... good for putting a breakpoint here for debugging
String retry = h.query(req2);
fail(err);
}
trackResults(got);
}
public void toParent(String childQuery, String parentType, String... moreParams) throws Exception {
boolean cache = random().nextInt(100) < 20;
String joinQ = "{!join from="+parentType+"_s to=id "
+ (cache ? "":" cache=false")
+ "}" + childQuery;
String blockQ = "{!parent which=type_s:"+parentType
+ (cache ? "":" cache=false")
+"}" + childQuery;
doCompare(joinQ, blockQ, params(moreParams));
}
public void toChildren(String parentQuery, String parentType, String... moreParams) throws Exception {
boolean cache = random().nextInt(100) < 20;
String joinQ = "{!join from=id to="+parentType+"_s"
+ (cache ? "":" cache=false")
+"}" + parentQuery;
// for block child join, any doc not marked as a parent is treated as a child.
// we need to mark all other docs (grandparents, etc) as parents...
StringBuilder sb = new StringBuilder("type_s:(");
int lev = parentType.charAt(0) - 'a';
for (int i=0; i<=lev; i++) {
sb.append(type(i)).append(' ');
}
sb.append(")");
String allParentTypes=sb.toString();
String blockQ = "{!child of='"+allParentTypes+"'"
+ (cache ? "":" cache=false")
+"}" + parentQuery;
doCompare(joinQ, blockQ, params(moreParams));
}
@Test
public void testJoin() throws Exception {
// a_s points to parents of type "a", b_s points to parents of type "b", etc.
SolrInputDocument doc = sdoc("id","1", "type_s","a");
/***
SolrInputDocument doc2 = sdoc("id","2", "type_s","b", "parent_s","1", "a_s","1"));
SolrInputDocument doc3 = sdoc("id","3", "type_s","b", "parent_s","1", "a_s","1"));
SolrInputDocument doc4 = sdoc("id","4", "type_s","c", "parent_s","3", "a_s","1", "b_s","3"));
SolrInputDocument doc5 = sdoc("id","5", "type_s","c", "parent_s","3", "a_s","1", "b_s","3"));
SolrInputDocument doc6 = sdoc("id","6", "type_s","b", "parent_s","1", "a_s","1"));
SolrInputDocument doc7 = sdoc("id","7", "type_s","b", "parent_s","1", "a_s","1"));
SolrInputDocument doc8 = sdoc("id","8", "type_s","c", "parent_s","7", "a_s","1", "b_s","7"));
SolrInputDocument doc9 = sdoc("id","9", "type_s","c", "parent_s","7", "a_s","1", "b_s","7"));
***/
doc.addChildDocument(sdoc("id","2", "type_s","b", "parent_s","1", "a_s","1"));
doc.addChildDocument(sdoc("id","4", "type_s","c", "parent_s","3", "a_s","1", "b_s","3"));
doc.addChildDocument(sdoc("id","5", "type_s","c", "parent_s","3", "a_s","1", "b_s","3"));
doc.addChildDocument(sdoc("id","3", "type_s","b", "parent_s","1", "a_s","1"));
doc.addChildDocument(sdoc("id","6", "type_s","b", "parent_s","1", "a_s","1"));
doc.addChildDocument(sdoc("id","8", "type_s","c", "parent_s","7", "a_s","1", "b_s","7"));
doc.addChildDocument(sdoc("id","9", "type_s","c", "parent_s","7", "a_s","1", "b_s","7"));
doc.addChildDocument(sdoc("id","7", "type_s","b", "parent_s","1", "a_s","1"));
assertU(adoc(doc));
assertU(adoc(doc)); // overwrite original
assertU(commit());
// for children, the parent filter needs to be any type of parent, else we will think they are a child...
toChildren("id:1", "a");
toChildren("id:1", "a");
toChildren("id:2", "b");
toChildren("id:3", "b");
toChildren("id:(2 6 7)", "b");
toChildren("id:(6)", "b");
toChildren("id:(7)", "b");
toChildren("id:(4 8)", "c"); // test leaf (should match no children)
toChildren("id:(4)", "c"); // test leaf (should match no children)
toChildren("id:(5)", "c"); // test leaf (should match no children)
// child("id:(2 6 7)", "a"); // error case - 2 6 and 7 are not of type "a"... will lead to repeated docs collected
/***
SolrQueryRequest req = req("wt","json","indent","true", "echoParams","all",
"q","{!join from=id to=b_s}id:3"
);
String rsp = h.query(req);
System.out.println(rsp);
req = req("wt","json","indent","true", "echoParams","all",
"q","{!child of=type_s:b}id:3"
);
rsp = h.query(req);
System.out.println(rsp);
***/
}
int id;
int MAX_LEVEL=5;
int MAX_CHILDREN=4;
int CHANCE_CHILDREN=30; // percent chance a document has a child, reduced by 4*current_level
private String type(int level) {
return Character.toString((char)('a'+level));
}
private String typeField(int level) {
return type(level)+"_s";
}
private boolean mandateChildren=false;
private SolrInputDocument randDoc(List<String> parents) {
if (parents == null) parents=Collections.EMPTY_LIST;
int level=parents.size();
SolrInputDocument sdoc = new SolrInputDocument();
String typeStr = type(level);
String idStr = Integer.toString(++id) + typeStr; // append the type to the id, for easier debugging
sdoc.addField("id", idStr);
sdoc.addField("type_s", typeStr);
sdoc.addField("level_i", level);
// add a_s:<parent_id>, b_s:<parent_id>, etc.
for (int i=0; i<parents.size(); i++) {
sdoc.addField(typeField(i), parents.get(i));
}
if ( (level < MAX_LEVEL && random().nextInt(100) < (CHANCE_CHILDREN-(level*4)))
|| (level==0 && mandateChildren)
) { // lower chance for children based on level
int nChildren = random().nextInt(MAX_CHILDREN)+1;
List<String> newParents = new ArrayList<String>(parents);
newParents.add(idStr);
for (int i=0; i<nChildren; i++) {
SolrInputDocument child = randDoc(newParents);
sdoc.addChildDocument(child);
}
}
return sdoc;
}
int maxDepth;
List<SolrInputDocument> allDocs; // all documents indexed
List<SolrInputDocument>[] docs; // documents separated by level
List<SolrInputDocument>[] childDocs; // docs at the current level and below, used as a working set and shuffled
private void populate(SolrInputDocument sdoc, int level) {
maxDepth = Math.max(maxDepth,level);
docs[level].add(sdoc);
allDocs.add(sdoc);
for (int i=level; i<=MAX_LEVEL; i++) {
childDocs[level].add(sdoc);
}
if (sdoc.hasChildDocuments()) {
for (SolrInputDocument child : sdoc.getChildDocuments()) {
populate(child, level+1);
}
}
}
public void indexBlocks(int nDocs) {
id = 0;
maxDepth = 0;
allDocs = new ArrayList<SolrInputDocument>();
docs = (List<SolrInputDocument>[]) new List[MAX_LEVEL+1];
childDocs = (List<SolrInputDocument>[]) new List[MAX_LEVEL+1];
for (int i=0; i<docs.length; i++) {
docs[i]=new ArrayList<SolrInputDocument>();
childDocs[i]=new ArrayList<SolrInputDocument>();
}
clearIndex();
while (--nDocs >= 0) {
SolrInputDocument sdoc = randDoc(null);
assertU(adoc(sdoc));
if (random().nextInt(100) < 5) {
assertU(commit());
}
if (random().nextInt(100) < 10) {
// overwrite the previous doc
assertU(adoc(sdoc));
}
populate(sdoc, 0);
}
assertU(commit());
}
String randomIds(List<SolrInputDocument> lst, int nDocs) {
Random r = random();
Collections.shuffle(lst, r);
if (nDocs <= 0) {
nDocs = r.nextInt(lst.size());
nDocs = r.nextInt(nDocs + 1); // do random twice to cut down the average size
nDocs++;
}
StringBuilder sb = new StringBuilder("id:(");
for (int i=0; i<nDocs; i++) {
sb.append(lst.get(i).getFieldValue("id"));
sb.append(' ');
}
sb.append(")");
String mainQ=sb.toString();
return mainQ;
}
@Test
public void testRandomJoin() throws Exception {
int indexIter=25;
int queryIter=100;
for (int iiter=0; iiter<indexIter; iiter++) {
int topLevelDocs=random().nextInt(30)+1;
indexBlocks(topLevelDocs);
if (maxDepth<=1) continue; // need more than one level to test
for(int qiter=0; qiter<queryIter; qiter++) {
// use filters to exercise skipping on the scorers too
String filter = null;
if (random().nextInt(100) < 70) { // filter 70 percent of the queries
int filterSize;
// we normally want filters that match a majority of the documents for
// better coverage (else results will often match because everything is filtered out.
if (random().nextInt(100)<70) {
filterSize = (int)(allDocs.size()*.9); // match 90 percent of the documents
} else {
filterSize = random().nextInt(allDocs.size());
}
filterSize = Math.max(1, filterSize);
filter = randomIds(allDocs, filterSize);
if (random().nextInt(100) < 50) {
filter = "{!cache=false}"+filter;
}
}
// query some children and match to their parents.
// min level is 1 since we're not supposed to match a parent with the query
int childLevel = random().nextInt(maxDepth-1)+1;
// we can map to any parent level above the child level
int parentLevel = random().nextInt(childLevel);
// collect some ids of type childLevel or below
String mainQ = randomIds(childDocs[childLevel], 0);
if (filter == null) {
toParent(mainQ, type(parentLevel));
} else {
toParent(mainQ, type(parentLevel), "fq", filter);
}
parentLevel = random().nextInt(maxDepth);
// For toChildren, the join query must only hit parent types (defined by the parent filter)
mainQ = randomIds(docs[parentLevel], 0);
if (filter == null) {
toChildren(mainQ, type(parentLevel));
} else {
toChildren(mainQ, type(parentLevel), "fq", filter);
}
}
}
printResults();
}
}