/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.search.facet; import java.io.IOException; import java.util.List; import org.apache.solr.JSONTestUtil; import org.apache.solr.SolrTestCaseHS; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.common.params.ModifiableSolrParams; import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.request.SolrQueryRequest; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; import org.noggit.JSONParser; import org.noggit.ObjectBuilder; public class TestJsonFacetRefinement extends SolrTestCaseHS { private static SolrInstances servers; // for distributed testing @BeforeClass public static void beforeTests() throws Exception { JSONTestUtil.failRepeatedKeys = true; initCore("solrconfig-tlog.xml","schema_latest.xml"); } public static void initServers() throws Exception { if (servers == null) { servers = new SolrInstances(3, "solrconfig-tlog.xml", "schema_latest.xml"); } } @AfterClass public static void afterTests() throws Exception { JSONTestUtil.failRepeatedKeys = false; if (servers != null) { servers.stop(); servers = null; } } // todo - pull up to test base class? public void matchJSON(String json, double delta, String... tests) throws Exception { for (String test : tests) { if (test == null) { assertNull(json); continue; } if (test.length()==0) continue; String err = JSONTestUtil.match(json, test, delta); if (err != null) { throw new RuntimeException("JSON failed validation. error=" + err + "\n expected =" + test + "\n got = " + json ); } } } public void match(Object input, double delta, String... tests) throws Exception { for (String test : tests) { String err = null; if (test == null) { if (input != null) { err = "expected null"; } } else if (input == null) { err = "got null"; } else { err = JSONTestUtil.matchObj(input, test, delta); } if (err != null) { throw new RuntimeException("JSON failed validation. error=" + err + "\n expected =" + test + "\n got = " + input ); } } } /** Use SimpleOrderedMap rather than Map to match responses from shards */ public static Object fromJSON(String json) throws IOException { JSONParser parser = new JSONParser(json); ObjectBuilder ob = new ObjectBuilder(parser) { @Override public Object newObject() throws IOException { return new SimpleOrderedMap(); } @Override public void addKeyVal(Object map, Object key, Object val) throws IOException { ((SimpleOrderedMap)map).add(key.toString(), val); } }; return ob.getObject(); } void doTestRefine(String facet, String... responsesAndTests) throws Exception { SolrQueryRequest req = req(); try { int nShards = responsesAndTests.length / 2; Object jsonFacet = ObjectBuilder.fromJSON(facet); FacetParser parser = new FacetTopParser(req); FacetRequest facetRequest = parser.parse(jsonFacet); FacetMerger merger = null; FacetMerger.Context ctx = new FacetMerger.Context(nShards); for (int i=0; i<nShards; i++) { Object response = fromJSON(responsesAndTests[i]); if (i==0) { merger = facetRequest.createFacetMerger(response); } ctx.newShard("s"+i); merger.merge(response, ctx); } for (int i=0; i<nShards; i++) { ctx.setShard("s"+i); Object refinement = merger.getRefinement(ctx); String tests = responsesAndTests[nShards+i]; match(refinement, 1e-5, tests); } } finally { req.close(); } } @Test public void testMerge() throws Exception { doTestRefine("{x : {type:terms, field:X, limit:2, refine:true} }", // the facet request "{x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } }", // shard0 response "{x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } }", // shard1 response null, // shard0 expected refinement info "=={x:{_l:[x1]}}" // shard1 expected refinement info ); // same test w/o refinement turned on doTestRefine("{x : {type:terms, field:X, limit:2} }", // the facet request "{x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } }", // shard0 response "{x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } }", // shard1 response null, // shard0 expected refinement info null // shard1 expected refinement info ); // same test, but nested in query facet doTestRefine("{top:{type:query, q:'foo_s:myquery', facet:{x : {type:terms, field:X, limit:2, refine:true} } } }", // the facet request "{top: {x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } } }", // shard0 response "{top: {x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } } }", // shard1 response null, // shard0 expected refinement info "=={top:{x:{_l:[x1]}}}" // shard1 expected refinement info ); // same test w/o refinement turned on doTestRefine("{top:{type:query, q:'foo_s:myquery', facet:{x : {type:terms, field:X, limit:2, refine:false} } } }", "{top: {x: {buckets:[{val:x1, count:5}, {val:x2, count:3}] } } }", // shard0 response "{top: {x: {buckets:[{val:x2, count:4}, {val:x3, count:2}] } } }", // shard1 response null, null ); // same test, but nested in a terms facet doTestRefine("{top:{type:terms, field:Afield, facet:{x : {type:terms, field:X, limit:2, refine:true} } } }", "{top: {buckets:[{val:'A', count:2, x:{buckets:[{val:x1, count:5},{val:x2, count:3}]} } ] } }", "{top: {buckets:[{val:'A', count:1, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } ] } }", null, "=={top: {" + "_s:[ ['A' , {x:{_l:[x1]}} ] ]" + " } " + "}" ); // same test, but nested in range facet doTestRefine("{top:{type:range, field:R, start:0, end:1, gap:1, facet:{x : {type:terms, field:X, limit:2, refine:true} } } }", "{top: {buckets:[{val:0, count:2, x:{buckets:[{val:x1, count:5},{val:x2, count:3}]} } ] } }", "{top: {buckets:[{val:0, count:1, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } ] } }", null, "=={top: {" + "_s:[ [0 , {x:{_l:[x1]}} ] ]" + " } " + "}" ); // for testing partial _p, we need a partial facet within a partial facet doTestRefine("{top:{type:terms, field:Afield, refine:true, limit:1, facet:{x : {type:terms, field:X, limit:1, refine:true} } } }", "{top: {buckets:[{val:'A', count:2, x:{buckets:[{val:x1, count:5},{val:x2, count:3}]} } ] } }", "{top: {buckets:[{val:'B', count:1, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } ] } }", null, "=={top: {" + "_p:[ ['A' , {x:{_l:[x1]}} ] ]" + " } " + "}" ); // test partial _p under a missing bucket doTestRefine("{top:{type:terms, field:Afield, refine:true, limit:1, missing:true, facet:{x : {type:terms, field:X, limit:1, refine:true} } } }", "{top: {buckets:[], missing:{count:12, x:{buckets:[{val:x2, count:4},{val:x3, count:2}]} } } }", "{top: {buckets:[], missing:{count:10, x:{buckets:[{val:x1, count:5},{val:x4, count:3}]} } } }", "=={top: {" + "missing:{x:{_l:[x1]}}" + " } " + "}" , null ); } @Test public void testBasicRefinement() throws Exception { initServers(); Client client = servers.getClient(random().nextInt()); client.queryDefaults().set( "shards", servers.getShards(), "debugQuery", Boolean.toString(random().nextBoolean()) ); List<SolrClient> clients = client.getClientProvider().all(); assertTrue(clients.size() >= 3); client.deleteByQuery("*:*", null); ModifiableSolrParams p = params("cat_s", "cat_s", "xy_s", "xy_s", "num_d", "num_d", "qw_s", "qw_s", "er_s","er_s"); String cat_s = p.get("cat_s"); String xy_s = p.get("xy_s"); String qw_s = p.get("qw_s"); String er_s = p.get("er_s"); // this field is designed to test numBuckets refinement... the first phase will only have a single bucket returned for the top count bucket of cat_s String num_d = p.get("num_d"); clients.get(0).add( sdoc("id", "01", "all_s","all", cat_s, "A", xy_s, "X" ,num_d, -1, qw_s, "Q", er_s,"E") ); // A wins count tie clients.get(0).add( sdoc("id", "02", "all_s","all", cat_s, "B", xy_s, "Y", num_d, 3 ) ); clients.get(1).add( sdoc("id", "11", "all_s","all", cat_s, "B", xy_s, "X", num_d, -5 , er_s,"E") ); // B highest count clients.get(1).add( sdoc("id", "12", "all_s","all", cat_s, "B", xy_s, "Y", num_d, -11, qw_s, "W" ) ); clients.get(1).add( sdoc("id", "13", "all_s","all", cat_s, "A", xy_s, "X", num_d, 7 , er_s,"R") ); // "R" will only be picked up via refinement when parent facet is cat_s clients.get(2).add( sdoc("id", "21", "all_s","all", cat_s, "A", xy_s, "X", num_d, 17, qw_s, "W", er_s,"E") ); // A highest count clients.get(2).add( sdoc("id", "22", "all_s","all", cat_s, "A", xy_s, "Y", num_d, -19 ) ); clients.get(2).add( sdoc("id", "23", "all_s","all", cat_s, "B", xy_s, "X", num_d, 11 ) ); client.commit(); // Shard responses should be A=1, B=2, A=2, merged should be "A=3, B=2" // One shard will have _facet_={"refine":{"cat0":{"_l":["A"]}}} on the second phase /**** // fake a refinement request... good for development/debugging assertJQ(clients.get(1), params(p, "q", "*:*", "_facet_","{refine:{cat0:{_l:[A]}}}", "isShard","true", "distrib","false", "shards.purpose","2097216", "ids","11,12,13", "json.facet", "{" + "cat0:{type:terms, field:cat_s, sort:'count desc', limit:1, overrequest:0, refine:true}" + "}" ) , "facets=={foo:555}" ); ****/ client.testJQ(params(p, "q", "*:*", "json.facet", "{" + "cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:false}" + "}" ) , "facets=={ count:8" + ", cat0:{ buckets:[ {val:A,count:3} ] }" + // w/o overrequest and refinement, count is lower than it should be (we don't see the A from the middle shard) "}" ); client.testJQ(params(p, "q", "*:*", "json.facet", "{" + "cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:true}" + "}" ) , "facets=={ count:8" + ", cat0:{ buckets:[ {val:A,count:4} ] }" + // w/o overrequest, we need refining to get the correct count. "}" ); // test that basic stats work for refinement client.testJQ(params(p, "q", "*:*", "json.facet", "{" + "cat0:{type:terms, field:${cat_s}, sort:'count desc', limit:1, overrequest:0, refine:true, facet:{ stat1:'sum(${num_d})'} }" + "}" ) , "facets=={ count:8" + ", cat0:{ buckets:[ {val:A,count:4, stat1:4.0} ] }" + "}" ); // test sorting buckets by a different stat client.testJQ(params(p, "q", "*:*", "json.facet", "{" + " cat0:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:false, facet:{ min1:'min(${num_d})'} }" + ",cat1:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} }" + ",qfacet:{type:query, q:'*:*', facet:{ cat2:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} } }}" + // refinement needed through a query facet ",allf:{type:terms, field:all_s, facet:{ cat3:{type:terms, field:${cat_s}, sort:'min1 asc', limit:1, overrequest:0, refine:true, facet:{ min1:'min(${num_d})'} } }}" + // refinement needed through field facet ",sum1:'sum(num_d)'" + // make sure that root bucket stats aren't affected by refinement "}" ) , "facets=={ count:8" + ", cat0:{ buckets:[ {val:A,count:3, min1:-19.0} ] }" + // B wins in shard2, so we're missing the "A" count for that shard w/o refinement. ", cat1:{ buckets:[ {val:A,count:4, min1:-19.0} ] }" + // with refinement, we get the right count ", qfacet:{ count:8, cat2:{ buckets:[ {val:A,count:4, min1:-19.0} ] } }" + // just like the previous response, just nested under a query facet ", allf:{ buckets:[ {cat3:{ buckets:[ {val:A,count:4, min1:-19.0} ] } ,count:8,val:all }] }" + // just like the previous response, just nested under a field facet ", sum1:2.0" + "}" ); // test partial buckets (field facet within field facet) client.testJQ(params(p, "q", "*:*", "json.facet", "{" + "ab:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, refine:true } }}" + "}" ) , "facets=={ count:8" + ", ab:{ buckets:[ {val:A, count:4, xy:{buckets:[ {val:X,count:3}]} }] }" + // just like the previous response, just nested under a field facet "}" ); // test that sibling facets and stats are included for _p buckets, but skipped for _s buckets client.testJQ(params(p, "q", "*:*", "json.facet", "{" + " ab :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, refine:true}, qq:{query:'*:*'},ww:'sum(${num_d})' }}" + ",ab2:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, refine:true}, qq:{query:'*:*'},ww:'sum(${num_d})' }}" + // top level refine=false shouldn't matter ",allf :{type:terms, field:all_s, limit:1, overrequest:0, refine:true, facet:{cat:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true}, qq:{query:'*:*'},ww:'sum(${num_d})' }}" + ",allf2:{type:terms, field:all_s, limit:1, overrequest:0, refine:false, facet:{cat:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true}, qq:{query:'*:*'},ww:'sum(${num_d})' }}" + // top level refine=false shouldn't matter "}" ) , "facets=={ count:8" + ", ab:{ buckets:[ {val:A, count:4, xy:{buckets:[ {val:X,count:3}]} ,qq:{count:4}, ww:4.0 }] }" + // make sure qq and ww are included for _p buckets ", allf:{ buckets:[ {count:8, val:all, cat:{buckets:[{val:A,count:4}]} ,qq:{count:8}, ww:2.0 }] }" + // make sure qq and ww are excluded (not calculated again in another phase) for _s buckets ", ab2:{ buckets:[ {val:A, count:4, xy:{buckets:[ {val:X,count:3}]} ,qq:{count:4}, ww:4.0 }] }" + // make sure qq and ww are included for _p buckets ", allf2:{ buckets:[ {count:8, val:all, cat:{buckets:[{val:A,count:4}]} ,qq:{count:8}, ww:2.0 }] }" + // make sure qq and ww are excluded (not calculated again in another phase) for _s buckets "}" ); // test refining under the special "missing" bucket of a field facet client.testJQ(params(p, "q", "*:*", "json.facet", "{" + "f:{type:terms, field:missing_s, limit:1, overrequest:0, missing:true, refine:true, facet:{ cat:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true } }}" + "}" ) , "facets=={ count:8" + ", f:{ buckets:[], missing:{count:8, cat:{buckets:[{val:A,count:4}]} } }" + // just like the previous response, just nested under a field facet "}" ); // test filling in "missing" bucket for partially refined facets client.testJQ(params(p, "q", "*:*", "json.facet", "{" + // test all values missing in sub-facet " ab :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, facet:{ zz:{type:terms, field:missing_s, limit:1, overrequest:0, refine:false, missing:true} }}" + ",ab2:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true , facet:{ zz:{type:terms, field:missing_s, limit:1, overrequest:0, refine:true , missing:true} }}" + // test some values missing in sub-facet (and test that this works with normal partial bucket refinement) ", cd :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, facet:{ qw:{type:terms, field:${qw_s}, limit:1, overrequest:0, refine:false, missing:true, facet:{qq:{query:'*:*'}} } }}" + ", cd2:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true , facet:{ qw:{type:terms, field:${qw_s}, limit:1, overrequest:0, refine:true , missing:true, facet:{qq:{query:'*:*'}} } }}" + "}" ) , "facets=={ count:8" + ", ab:{ buckets:[ {val:A, count:3, zz:{buckets:[], missing:{count:3}}}] }" + ",ab2:{ buckets:[ {val:A, count:4, zz:{buckets:[], missing:{count:4}}}] }" + ", cd:{ buckets:[ {val:A, count:3, qw:{buckets:[{val:Q, count:1, qq:{count:1}}], missing:{count:1,qq:{count:1}}}}] }" + ",cd2:{ buckets:[ {val:A, count:4, qw:{buckets:[{val:Q, count:1, qq:{count:1}}], missing:{count:2,qq:{count:2}}}}] }" + "}" ); // test filling in missing "allBuckets" client.testJQ(params(p, "q", "*:*", "json.facet", "{" + " cat :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, allBuckets:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, allBuckets:true, refine:false} } }" + ", cat2:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true , allBuckets:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, allBuckets:true, refine:true } } }" + ", cat3:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true , allBuckets:true, facet:{ xy:{type:terms, field:${xy_s}, limit:1, overrequest:0, allBuckets:true, refine:true , facet:{f:'sum(${num_d})'} } } }" + "}" ) , "facets=={ count:8" + ", cat:{ allBuckets:{count:8}, buckets:[ {val:A, count:3, xy:{buckets:[{count:2, val:X}], allBuckets:{count:3}}}] }" + ",cat2:{ allBuckets:{count:8}, buckets:[ {val:A, count:4, xy:{buckets:[{count:3, val:X}], allBuckets:{count:4}}}] }" + ",cat3:{ allBuckets:{count:8}, buckets:[ {val:A, count:4, xy:{buckets:[{count:3, val:X, f:23.0}], allBuckets:{count:4, f:4.0}}}] }" + "}" ); // test filling in missing numBuckets client.testJQ(params(p, "q", "*:*", "json.facet", "{" + " cat :{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:false, numBuckets:true, facet:{ er:{type:terms, field:${er_s}, limit:1, overrequest:0, numBuckets:true, refine:false} } }" + ", cat2:{type:terms, field:${cat_s}, limit:1, overrequest:0, refine:true , numBuckets:true, facet:{ er:{type:terms, field:${er_s}, limit:1, overrequest:0, numBuckets:true, refine:true } } }" + "}" ) , "facets=={ count:8" + ", cat:{ numBuckets:2, buckets:[ {val:A, count:3, er:{numBuckets:1,buckets:[{count:2, val:E}] }}] }" + // the "R" bucket will not be seen w/o refinement ",cat2:{ numBuckets:2, buckets:[ {val:A, count:4, er:{numBuckets:2,buckets:[{count:2, val:E}] }}] }" + "}" ); final String sort_limit_over = "sort:'count desc', limit:1, overrequest:0, "; // simplistic join domain testing: no refinement == low count client.testJQ(params(p, "q", "${xy_s}:Y", // query only matches one doc per shard "json.facet", "{"+ " cat0:{type:terms, field:${cat_s}, "+sort_limit_over+" refine:false,"+ // self join on all_s ensures every doc on every shard included in facets " domain: { join: { from:all_s, to:all_s } } }" + "}" ) , "/response/numFound==3", "facets=={ count:3, " + // w/o overrequest and refinement, count for 'A' is lower than it should be // (we don't see the A from the middle shard) " cat0:{ buckets:[ {val:A,count:3} ] } }"); // simplistic join domain testing: refinement == correct count client.testJQ(params(p, "q", "${xy_s}:Y", // query only matches one doc per shard "json.facet", "{" + " cat0:{type:terms, field:${cat_s}, "+sort_limit_over+" refine:true,"+ // self join on all_s ensures every doc on every shard included in facets " domain: { join: { from:all_s, to:all_s } } }" + "}" ) , "/response/numFound==3", "facets=={ count:3," + // w/o overrequest, we need refining to get the correct count for 'A'. " cat0:{ buckets:[ {val:A,count:4} ] } }"); // contrived join domain + refinement (at second level) + testing client.testJQ(params(p, "q", "${xy_s}:Y", // query only matches one doc per shard "json.facet", "{" + // top level facet has a single term " all:{type:terms, field:all_s, "+sort_limit_over+" refine:true, " + " facet:{ "+ // subfacet will facet on cat after joining on all (so all docs should be included in subfacet) " cat0:{type:terms, field:${cat_s}, "+sort_limit_over+" refine:true,"+ " domain: { join: { from:all_s, to:all_s } } } } }" + "}" ) , "/response/numFound==3", "facets=={ count:3," + // all 3 docs matching base query have same 'all' value in top facet " all:{ buckets:[ { val:all, count:3, " + // sub facet has refinement, so count for 'A' should be correct " cat0:{ buckets: [{val:A,count:4}] } } ] } }"); } }