/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.After;
import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.*;
import java.util.Locale;
import java.util.Random;
/**
* @version $Id: TestFaceting.java 955257 2010-06-16 15:17:32Z mikemccand $
*/
public class TestFaceting extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeClass() throws Exception {
initCore("solrconfig.xml","schema11.xml");
}
@After
@Override
public void tearDown() throws Exception {
close();
super.tearDown();
}
String t(int tnum) {
return String.format(Locale.US, "%08d", tnum);
}
void createIndex(int nTerms) {
assertU(delQ("*:*"));
for (int i=0; i<nTerms; i++) {
assertU(adoc("id", Float.toString(i), proto.field(), t(i) ));
}
assertU(optimize()); // squeeze out any possible deleted docs
}
Term proto = new Term("field_s","");
SolrQueryRequest req; // used to get a searcher
void close() {
if (req!=null) req.close();
req = null;
}
void doTermEnum(int size) throws Exception {
close();
createIndex(size);
req = lrf.makeRequest("q","*:*");
TermIndex ti = new TermIndex(proto.field());
NumberedTermsEnum te = ti.getEnumerator(req.getSearcher().getReader());
// iterate through first
while(te.term() != null) te.next();
assertEquals(size, te.getTermNumber());
te.close();
te = ti.getEnumerator(req.getSearcher().getReader());
Random r = new Random(size);
// test seeking by term string
for (int i=0; i<size*2+10; i++) {
int rnum = r.nextInt(size+2);
String s = t(rnum);
BytesRef br = te.skipTo(new BytesRef(s));
assertEquals(br != null, rnum < size);
if (rnum < size) {
assertEquals(rnum, te.pos);
assertEquals(s, te.term().utf8ToString());
} else {
assertEquals(null, te.term());
assertEquals(size, te.getTermNumber());
}
}
// test seeking before term
assertEquals(size>0, te.skipTo(new BytesRef("000")) != null);
assertEquals(0, te.getTermNumber());
if (size>0) {
assertEquals(t(0), te.term().utf8ToString());
} else {
assertEquals(null, te.term());
}
if (size>0) {
// test seeking by term number
for (int i=0; i<size*2+10; i++) {
int rnum = r.nextInt(size);
String s = t(rnum);
BytesRef br = te.skipTo(rnum);
assertNotNull(br);
assertEquals(rnum, te.pos);
assertEquals(s, te.term().utf8ToString());
}
}
}
@Test
public void testTermEnum() throws Exception {
doTermEnum(0);
doTermEnum(1);
doTermEnum(TermIndex.interval - 1); // test boundaries around the block size
doTermEnum(TermIndex.interval);
doTermEnum(TermIndex.interval + 1);
doTermEnum(TermIndex.interval * 2 + 2);
// doTermEnum(TermIndex.interval * 3 + 3);
}
@Test
public void testFacets() throws Exception {
StringBuilder sb = new StringBuilder();
// go over 4096 to test some of the buffer resizing
for (int i=0; i<5000; i++) {
sb.append(t(i));
sb.append(' ');
}
assertU(adoc("id", "1", "many_ws", sb.toString()));
assertU(commit());
assertQ("check many tokens",
req("q", "id:1","indent","true"
,"facet", "true", "facet.method","fc"
,"facet.field", "many_ws"
,"facet.limit", "-1"
)
,"*[count(//lst[@name='many_ws']/int)=5000]"
,"//lst[@name='many_ws']/int[@name='" + t(0) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(1) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(2) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(3) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(5) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4092) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4093) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4094) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4095) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4096) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4097) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4098) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4090) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4999) + "'][.='1']"
);
// test gaps that take more than one byte
sb = new StringBuilder();
sb.append(t(0)).append(' ');
sb.append(t(150)).append(' ');
sb.append(t(301)).append(' ');
sb.append(t(453)).append(' ');
sb.append(t(606)).append(' ');
sb.append(t(1000)).append(' ');
sb.append(t(2010)).append(' ');
sb.append(t(3050)).append(' ');
sb.append(t(4999)).append(' ');
assertU(adoc("id", "2", "many_ws", sb.toString()));
assertQ("check many tokens",
req("q", "id:1","indent","true"
,"facet", "true", "facet.method","fc"
,"facet.field", "many_ws"
,"facet.limit", "-1"
)
,"*[count(//lst[@name='many_ws']/int)=5000]"
,"//lst[@name='many_ws']/int[@name='" + t(0) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(150) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(301) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(453) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(606) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(1000) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(2010) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(3050) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(4999) + "'][.='1']"
);
}
@Test
public void testRegularBig() throws Exception {
StringBuilder sb = new StringBuilder();
// go over 4096 to test some of the buffer resizing
int nTerms=7;
for (int i=0; i<nTerms; i++) {
sb.append(t(i));
sb.append(' ');
}
int i1=1000000;
// int iter=65536+10;
int iter=1000;
int commitInterval=iter/9;
for (int i=0; i<iter; i++) {
// assertU(adoc("id", t(i), "many_ws", many_ws + t(i1+i) + " " + t(i1*2+i)));
assertU(adoc("id", t(i), "many_ws", t(i1+i) + " " + t(i1*2+i)));
if (iter % commitInterval == 0) {
assertU(commit());
}
}
assertU(commit());
for (int i=0; i<iter; i+=iter/10) {
assertQ("check many tokens",
req("q", "id:"+t(i),"indent","true"
,"facet", "true", "facet.method","fc"
,"facet.field", "many_ws"
,"facet.limit", "-1"
,"facet.mincount", "1"
)
,"*[count(//lst[@name='many_ws']/int)=" + 2 + "]"
,"//lst[@name='many_ws']/int[@name='" + t(i1+i) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(i1*2+i) + "'][.='1']"
);
}
int i=iter-1;
assertQ("check many tokens",
req("q", "id:"+t(i),"indent","true"
,"facet", "true", "facet.method","fc"
,"facet.field", "many_ws"
,"facet.limit", "-1"
,"facet.mincount", "1"
)
,"*[count(//lst[@name='many_ws']/int)=" + 2 + "]"
,"//lst[@name='many_ws']/int[@name='" + t(i1+i) + "'][.='1']"
,"//lst[@name='many_ws']/int[@name='" + t(i1*2+i) + "'][.='1']"
);
}
}