/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import java.util.Locale;
import java.util.Random;
import org.apache.lucene.index.DocTermOrds;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
/**
*
*/
public class TestFaceting extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema11.xml");
}
@Override
public void setUp() throws Exception {
super.setUp();
clearIndex();
}
@After
@Override
public void tearDown() throws Exception {
close();
super.tearDown();
}
String t(int tnum) {
return String.format(Locale.ROOT, "%08d", tnum);
}
void createIndex(int nTerms) {
assertU(delQ("*:*"));
for (int i=0; i<nTerms; i++) {
assertU(adoc("id", Float.toString(i), proto.field(), t(i) ));
}
assertU(optimize()); // squeeze out any possible deleted docs
}
Term proto = new Term("field_s","");
SolrQueryRequest req; // used to get a searcher
void close() {
if (req!=null) req.close();
req = null;
}
void doTermEnum(int size) throws Exception {
//System.out.println("doTermEnum size=" + size);
close();
createIndex(size);
req = lrf.makeRequest("q","*:*");
UnInvertedField uif = new UnInvertedField(proto.field(), req.getSearcher());
assertEquals(size, uif.getNumTerms());
TermsEnum te = uif.getOrdTermsEnum(req.getSearcher().getAtomicReader());
assertEquals(size == 0, te == null);
Random r = new Random(size);
// test seeking by term string
for (int i=0; i<size*2+10; i++) {
int rnum = r.nextInt(size+2);
String s = t(rnum);
//System.out.println("s=" + s);
final BytesRef br;
if (te == null) {
br = null;
} else {
TermsEnum.SeekStatus status = te.seekCeil(new BytesRef(s));
if (status == TermsEnum.SeekStatus.END) {
br = null;
} else {
br = te.term();
}
}
assertEquals(br != null, rnum < size);
if (rnum < size) {
assertEquals(rnum, (int) te.ord());
assertEquals(s, te.term().utf8ToString());
}
}
// test seeking before term
if (size>0) {
assertEquals(size>0, te.seekCeil(new BytesRef("000"), true) != TermsEnum.SeekStatus.END);
assertEquals(0, te.ord());
assertEquals(t(0), te.term().utf8ToString());
}
if (size>0) {
// test seeking by term number
for (int i=0; i<size*2+10; i++) {
int rnum = r.nextInt(size);
String s = t(rnum);
te.seekExact((long) rnum);
BytesRef br = te.term();
assertNotNull(br);
assertEquals(rnum, (int) te.ord());
assertEquals(s, te.term().utf8ToString());
}
}
}
@Test
public void testTermEnum() throws Exception {
doTermEnum(0);
doTermEnum(1);
final int DEFAULT_INDEX_INTERVAL = 1 << DocTermOrds.DEFAULT_INDEX_INTERVAL_BITS;
doTermEnum(DEFAULT_INDEX_INTERVAL - 1); // test boundaries around the block size
doTermEnum(DEFAULT_INDEX_INTERVAL);
doTermEnum(DEFAULT_INDEX_INTERVAL + 1);
doTermEnum(DEFAULT_INDEX_INTERVAL * 2 + 2);
// doTermEnum(DEFAULT_INDEX_INTERVAL * 3 + 3);
}
@Test
public void testFacets() throws Exception {
StringBuilder sb = new StringBuilder();
// go over 4096 to test some of the buffer resizing
for (int i=0; i<5000; i++) {
sb.append(t(i));
sb.append(' ');
}
assertU(adoc("id", "1", "many_ws", sb.toString()));
assertU(commit());
assertQ("check many tokens",
req("q", "id:1","indent","true"
,"facet", "true", "facet.method","fc"
,"facet.field", "many_ws"
,"facet.limit", "-1"
)
,"*[count(//lst[@name='many_ws']/int)=5000]"
,"//lst[@name='many_ws']/int[@name='" + t(0) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(1) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(2) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(3) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(5) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4092) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4093) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4094) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4095) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4096) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4097) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4098) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4090) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4999) + "'][.='1']"
);
// test gaps that take more than one byte
sb = new StringBuilder();
sb.append(t(0)).append(' ');
sb.append(t(150)).append(' ');
sb.append(t(301)).append(' ');
sb.append(t(453)).append(' ');
sb.append(t(606)).append(' ');
sb.append(t(1000)).append(' ');
sb.append(t(2010)).append(' ');
sb.append(t(3050)).append(' ');
sb.append(t(4999)).append(' ');
assertU(adoc("id", "2", "many_ws", sb.toString()));
assertQ("check many tokens",
req("q", "id:1","indent","true"
,"facet", "true", "facet.method","fc"
,"facet.field", "many_ws"
,"facet.limit", "-1"
)
,"*[count(//lst[@name='many_ws']/int)=5000]"
,"//lst[@name='many_ws']/int[@name='" + t(0) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(150) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(301) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(453) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(606) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(1000) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(2010) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(3050) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4999) + "'][.='1']"
);
}
@Test
public void testRegularBig() throws Exception {
StringBuilder sb = new StringBuilder();
// go over 4096 to test some of the buffer resizing
int nTerms=7;
for (int i=0; i<nTerms; i++) {
sb.append(t(i));
sb.append(' ');
}
int i1=1000000;
// int iter=65536+10;
int iter=1000;
int commitInterval=iter/9;
for (int i=0; i<iter; i++) {
// assertU(adoc("id", t(i), "many_ws", many_ws + t(i1+i) + " " + t(i1*2+i)));
assertU(adoc("id", t(i), "many_ws", t(i1+i) + " " + t(i1*2+i)));
if (iter % commitInterval == 0) {
assertU(commit());
}
}
assertU(commit());
for (int i=0; i<iter; i+=iter/10) {
assertQ("check many tokens",
req("q", "id:"+t(i),"indent","true"
,"facet", "true", "facet.method","fc"
,"facet.field", "many_ws"
,"facet.limit", "-1"
,"facet.mincount", "1"
)
,"*[count(//lst[@name='many_ws']/int)=" + 2 + "]"
,"//lst[@name='many_ws']/int[@name='" + t(i1+i) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(i1*2+i) + "'][.='1']"
);
}
int i=iter-1;
assertQ("check many tokens",
req("q", "id:"+t(i),"indent","true"
,"facet", "true", "facet.method","fc"
,"facet.field", "many_ws"
,"facet.limit", "-1"
,"facet.mincount", "1"
)
,"*[count(//lst[@name='many_ws']/int)=" + 2 + "]"
,"//lst[@name='many_ws']/int[@name='" + t(i1+i) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(i1*2+i) + "'][.='1']"
);
}
}