package lux.solr;
import java.util.ArrayList;
import java.util.Collection;
import org.apache.solr.common.SolrInputDocument;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
/** Tests for configurable analysis chain */
public class SchemaTest extends BaseSolrTest {
@BeforeClass
public static void setup() throws Exception {
// inhibit the startup of a default core by our superclass
}
@AfterClass
public static void tearDown() throws Exception {
// inhibit the class-level tearDown by our superclass; do it after each test:
}
@After
public void myTearDown () throws Exception {
BaseSolrTest.tearDown();
}
@Test
public void testConfigureXmlAnalyzer () throws Exception {
// schema alters the text analysis used for lux_text, lux_elt_text and lux_att_text as well
BaseSolrTest.setup("solr", "core2");
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument> ();
addSolrDoc ("test1", "<doc><title id='1'>This is a test</title><test>balloons</test>comma,separated</doc>", docs,
"uri", "xml");
solr.add (docs);
solr.commit();
// lux_text has case-folding, whitespace tokenization, and stemming:
assertSolrQueryCount (1, "balloon");
assertQueryCount (1, 1, "document", "doc", "lux:search('balloon')");
assertSolrQueryCount (1, "balloons");
assertQueryCount (1, 1, "document", "doc", "lux:search('balloons')");
// check that query is analyzed as well
assertSolrQueryCount (1, "tests");
assertQueryCount (1, 1, "document", "doc", "lux:search('tests')");
assertSolrQueryCount (0, "comma");
assertQueryCount (0, 0, "document", "", "lux:search('comma')");
assertSolrQueryCount (1, "comma,separated");
assertQueryCount (1, 1, "document", "doc", "lux:search('comma,separated')");
assertSolrQueryCount (1, "this");
assertQueryCount (1, 1, "document", "doc", "lux:search('this')");
assertSolrQueryCount (1, "This");
assertQueryCount (1, 1, "document", "doc", "lux:search('This')");
// schema includes a copyField from lux_text -> lux_text_unstemmed, which has no stemming
assertSolrQueryCount (1, "lux_text_unstemmed:balloons");
assertQueryCount (1, 1, "document", "doc", "lux:search('lux_text_unstemmed:balloons')");
assertSolrQueryCount (0, "lux_text_unstemmed:balloon");
assertQueryCount (0, 0, "document", "", "lux:search('lux_text_unstemmed:balloon')");
// schema includes a copyField from lux_text -> lux_text_case, which has is case-sensitive
assertSolrQueryCount (0, "lux_text_case:this");
assertQueryCount (0, 0, "document", "", "lux:search('lux_text_case:this')");
assertSolrQueryCount (1, "lux_text_case:This");
assertQueryCount (1, 1, "document", "doc", "lux:search('lux_text_case:This')");
// test that stemming and case-folding have been applied to the element text index as well
assertSolrQueryCount (1, "lux_elt_text:test\\:balloon");
assertQueryCount (1, 1, "document", "doc", "lux:search('<test:balloon')");
// This doesn't work because stemming gets applied to the 'test:balloons'
// but this isn't an issue if we just say that the supported thing is lux:search('<test:balloons')
//assertQueryCount (1, "lux_elt_text:doc\\:balloons");
assertQueryCount (1, 1, "document", "doc", "lux:search('<test:balloons')");
//assertQueryCount (1, "lux_elt_text:doc\\:tests");
assertQueryCount (1, 1, "document", "doc", "lux:search('<title:tests')");
assertSolrQueryCount (0, "lux_elt_text:doc\\:comma");
assertQueryCount (0, 0, "document", "", "lux:search('<doc:comma')");
assertSolrQueryCount (1, "lux_elt_text:doc\\:comma,separated");
assertQueryCount (1, 1, "document", "doc", "lux:search('<doc:comma,separated')");
assertSolrQueryCount (1, "lux_elt_text:title\\:this");
assertQueryCount (1, 1, "document", "doc", "lux:search('<title:this')");
assertSolrQueryCount (1, "lux_elt_text:title\\:This");
assertQueryCount (1, 1, "document", "doc", "lux:search('<title:This')");
}
@Test
public void testDefaultXmlAnalyzer () throws Exception {
// the default analyzer is based on StandardAnalyzer
BaseSolrTest.setup("solr", "collection1");
Collection<SolrInputDocument> docs = new ArrayList<SolrInputDocument> ();
addSolrDoc ("test1", "<doc><title id='1'>This is a test</title><test>balloons</test>comma,separated</doc>", docs);
solr.add (docs);
solr.commit();
// lux_text uses the (Lux) default analyzer which has case-folding, standard tokenization, and no stemming:
assertSolrQueryCount (0, "balloon");
assertQueryCount (0, 0, "", "", "lux:search('balloon')");
assertSolrQueryCount (1, "lux_text:balloons");
assertQueryCount (1, 1, "document", "doc", "lux:search('balloons')");
assertSolrQueryCount (1, "lux_text:comma,separated");
assertQueryCount (1, 1, "document", "doc", "lux:search('comma,separated')");
assertSolrQueryCount (1, "lux_text:comma");
assertQueryCount (1, 1, "document", "doc", "lux:search('comma')");
assertSolrQueryCount (1, "lux_text:this");
assertQueryCount (1, 1, "document", "doc", "lux:search('this')");
assertSolrQueryCount (1, "lux_text:This");
}
}
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */