SearchTest.java example

Explorer
lux-master
- src
package lux;

import static lux.IndexTestSupportBase.*;
import static org.junit.Assert.*;

import java.util.Iterator;

import lux.exception.LuxException;
import lux.saxon.UnOptimizer;
import lux.xpath.AbstractExpression;
import lux.xquery.XQuery;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmNode;

import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;

/**
 * Check a variety of XPath queries, ensuring that results when executed using the default indexing
 * settings, as provided by IndexTestSupport, are correct, 
 * and test that expected optimizations are in fact being applied. 
 */
@RunWith (MultiThreadedRunner.class)
public class SearchTest extends BaseSearchTest {
    
    private static final String HAMLET_TITLE_MARKUP = "<TITLE>The Tragedy of Hamlet, Prince of Denmark</TITLE>";
    private static final String HAMLET_TITLE = "The Tragedy of Hamlet, Prince of Denmark";

    @BeforeClass
    public static void setup() throws Exception {
        setup ("lux/hamlet.xml");
    }
    
    @Test
    public void testSearchAllDocs() throws Exception {
        XdmResultSet results = assertSearch("/", IndexTestSupportBase.QUERY_EXACT);
        assertEquals (index.totalDocs, results.size());
    }
    
    @Test
    public void testCountAllDocs () throws Exception {
        XdmResultSet results = assertSearch ("count(/)", QUERY_NO_DOCS, totalDocs);
        assertEquals (String.valueOf(totalDocs), results.iterator().next().toString());

        results = assertSearch ("count(collection())", QUERY_NO_DOCS, totalDocs);
        assertEquals (String.valueOf(totalDocs), results.iterator().next().toString());

        results = assertSearch ("count(lux:search('*:*'))", QUERY_NO_DOCS, totalDocs);
        assertEquals (String.valueOf(totalDocs), results.iterator().next().toString());
    }

    @Test
    public void testCountActChildren () throws Exception {
        // Test an assumption about query accuracy:
        // span query slop is less precise than the XPath here, so 6 documents
        // must be examined even though only 1 (the PLAY) contains the five child ACTs
        assertSearch ("5", "count (/*/ACT)", 0, 6);
    }
    
    @Test
    public void testExists () throws Exception {
        assertSearch ("true", "exists(/)", QUERY_NO_DOCS, 1);
        assertSearch ("true", "exists(//SCENE)", QUERY_NO_DOCS, 1);
        assertSearch ("false", "exists(//foo)", QUERY_NO_DOCS, 0);
        assertSearch ("true", "exists(//SCENE/root())", QUERY_NO_DOCS, 1);
        assertSearch ("true", "exists(//SCENE) and exists(//ACT)", QUERY_NO_DOCS, 2);
        assertSearch ("true", "exists(//SCENE/root()//ACT)", QUERY_NO_DOCS, 1);
        assertSearch ("true", "exists((/)[.//SCENE and .//ACT])", QUERY_NO_DOCS, 1);
        assertSearch ("true", "exists(//ACT//SCENE)", QUERY_NO_DOCS, 1);
    }
    
    @Test
    public void testEmpty () throws Exception {
        XdmResultSet results = assertSearch ("empty(/)", QUERY_NO_DOCS, 1);
        assertEquals ("false", results.iterator().next().toString());
        assertSearch ("false", "empty(//SCENE)", QUERY_NO_DOCS, 1);
        assertSearch ("true", "empty(//foo)", QUERY_NO_DOCS, 0);
        assertSearch ("false", "empty(//SCENE/root())", QUERY_NO_DOCS, 1);
        assertSearch ("true", "empty(//SCENE) or empty(//foo)", QUERY_NO_DOCS, 1);
        assertSearch ("false", "empty(//SCENE/root()//ACT)", QUERY_NO_DOCS, 1);
        assertSearch ("false", "empty((/)[.//SCENE and .//ACT])", QUERY_NO_DOCS, 1);
    }

    @Test
    public void testNot() throws Exception {
        XdmResultSet results = assertSearch ("not(/)", QUERY_NO_DOCS, 1);
        assertEquals ("false", results.iterator().next().toString());
        assertSearch  ("false", "not(//SCENE)", QUERY_NO_DOCS, 1);
        assertSearch  ("true", "not(//foo)", QUERY_NO_DOCS, 0);
        assertSearch ("false", "not(//SCENE/root())", QUERY_NO_DOCS, 1);
        assertSearch ("true", "not(//SCENE) or not(//foo)", QUERY_NO_DOCS, 1);
        assertSearch ("false", "not(//SCENE/root()//ACT)", QUERY_NO_DOCS, 1);
        assertSearch ("false", "not((/)[.//SCENE and .//ACT])", QUERY_NO_DOCS, 1);
        assertSearch ("true", "not(//SCENE//ACT)", QUERY_NO_DOCS, 0);
    }
    
    @Test
    public void testNotExists() throws Exception {
        
    	// Trying to come up with a case where allowing the query from the expression inside
    	// exists() bleeds out into the surrounding query and cause an incorrect result,
    	// but Saxon tends to convert all these expressions:
        //  assertSearch  ("1", "count(/FM[exists(BLAH) eq false()])", 0, 1);
        //  assertSearch  ("1", "count(/FM[exists(BLAH) = false()])", 0, 1);
        // into this one, which is safe because the not() is treated as a non-optimized function,
        assertSearch  ("1", "count(/FM[not(exists(BLAH))])", 0, 1);
        
        // However this one exhibited the expected over-optimization failure:
    	assertSearch  ("1", "count(/FM[exists(BLAH) eq exists(BLARG)])", 0, 1);
    	assertSearch  ("1", "count(/FM[exists(BLAH) = exists(BLARG)])", 0, 1);
    	assertSearch  ("0", "count(/FM[exists(BLAH) != exists(BLARG)])", 0, 1);
    	assertSearch  ("0", "count(/FM[BLAH eq string(BLARG)])", 0, 0);
    	// NOTE: () eq () === ()
    	assertSearch  ("0", "count(/FM[BLAH eq BLARG])", 0, 0);
    	// NOTE: () = () === false()
    	assertSearch  ("0", "count(/FM[BLAH = BLARG])", 0, 0);

        // we don't optimize along the parent axis
        assertSearch  ("20", "count(//SCENE[not(exists(parent::ACT))])", 0, 26);

        assertSearch  ("1", "count(exists(/BLAH))", 0, 0);
    }


    @Test public void testLuxCount () throws Exception {
        assertSearch ("5", "lux:count('lux_path:\"\\{\\} ACT\"')", null, 5, 0);
    }

    @Test
    public void testPathOrder () throws Exception {
        // Make sure that the Optimizer doesn't incorrectly assert 
        // order is *not* significant in the generated query; 
        // it should be (SCENE AND ACT), and the query is *not* countable:
        // it's *not* how many documents have scenes and acts: it's how many scenes are there in documents with acts
        
        // Overall there are 20 scenes in 5 acts in 1 play
        // 40 = 20 (SCENEs in /PLAY) + 20 (SCENEs in the 5 /ACTs together)
        assertSearch ("40", "count(//ACT/root()//SCENE)", 0, 6);
        // 10 = 5 (ACTs in /PLAY) + 5 /ACT documents.
        assertSearch ("10", "count(//SCENE/root()//ACT)", 0, 6);
        // Why did we think this?:
        // 120 = 20 (scenes) * 5 (acts) in 1 /PLAY + 20 scenes in 5 /ACT documents
    }
    
    @Test
    public void testSearchAct() throws Exception {
        // path indexes make this exact
        XdmResultSet results = assertSearch ("/ACT", QUERY_EXACT);
        assertEquals (index.elementCounts.get("ACT") + 0, results.size());
        // Make sure that collection() is optimized
        results = assertSearch ("collection()/ACT", QUERY_EXACT);
        assertEquals (index.elementCounts.get("ACT") + 0, results.size());
        // and that references to variables are optimized
        results = assertSearch ("let $context := collection() return $context/ACT", QUERY_EXACT);
        assertEquals (index.elementCounts.get("ACT") + 0, results.size());
    }
    
    @Test
    public void testSearchActScene() throws Exception {
        XdmResultSet results = assertSearch("/ACT/SCENE", QUERY_MINIMAL);
        assertEquals (index.elementCounts.get("SCENE") + 0, results.size());
    }
    
    @Test
    public void testSearchAllScenes() throws Exception {
        XdmResultSet results = assertSearch("/ACT", QUERY_MINIMAL);
        assertEquals (5, results.size());
        XdmNode node = (XdmNode) results.iterator().next();
        String actURI = node.getDocumentURI().toString();
        results = assertSearch("//SCENE", QUERY_MINIMAL);
        // every SCENE, in its ACT and in the PLAY
        assertEquals (index.elementCounts.get("SCENE") * 3, results.size());
        Iterator<?> iter = results.iterator();
        for (int i = 0; i < index.elementCounts.get("SCENE"); i++) {
            // each scene, from the /PLAY document
            node = (XdmNode) iter.next();
            assertEquals ("lux://lux/hamlet.xml", node.getDocumentURI().toString());
            assertEquals ("lux://lux/hamlet.xml", node.getBaseURI().toString());                
        }
        XdmNode act1 = (XdmNode) iter.next();
        assertEquals (actURI, act1.getBaseURI().toString());
    }
    
    @Test
    public void testSearchAllSceneDocs() throws Exception {
        XdmResultSet results = assertSearch("(/)[.//SCENE]", QUERY_EXACT);
        // every SCENE, in its ACT and in the PLAY
        assertEquals (index.elementCounts.get("SCENE") + index.elementCounts.get("ACT") + 1, results.size());
    }
    
    @Test
    public void testSearchAllSceneDocsRoot() throws Exception {
        XdmResultSet results = assertSearch("//SCENE/root()", QUERY_EXACT);
        // every SCENE, in its ACT and in the PLAY
        assertEquals (index.elementCounts.get("SCENE") + index.elementCounts.get("ACT") + 1, results.size());
    }
    
    @Test
    public void testCountDocs () throws Exception {
        // every SCENE, in its ACT and in the PLAY
        int sceneDocCount = index.elementCounts.get("SCENE") + index.elementCounts.get("ACT") + 1;

        XdmResultSet results = assertSearch("count (//SCENE/root())", QUERY_NO_DOCS);
        assertResultValue(results, sceneDocCount);
        
        results = assertSearch("count ((/)[.//SCENE])", QUERY_NO_DOCS);
        assertResultValue(results, sceneDocCount);

        results = assertSearch("count (//SCENE/ancestor::document-node())", QUERY_NO_DOCS);
        assertResultValue(results, sceneDocCount);

        results = assertSearch("count (/descendant-or-self::SCENE/root())", QUERY_NO_DOCS);
        assertResultValue(results, sceneDocCount);
        
        results = assertSearch("count (/descendant::SCENE/root())", QUERY_NO_DOCS);
        assertResultValue(results, sceneDocCount);
        
        results = assertSearch("count (/SCENE)", QUERY_NO_DOCS);
        assertResultValue(results, index.elementCounts.get("SCENE"));
    }

    @Test
    public void testSyntaxError () throws Exception {
        try {
            assertSearch ("hey bad boy");
            assertTrue ("expected LuxException to be thrown for syntax error", false);
        } catch (LuxException e) {
        }
    }
    
    @Test
    public void testTextComparison () throws Exception {
        long t = System.currentTimeMillis();
        String xpath = "//SCNDESCR >= //PERSONA";
        Evaluator eval = index.makeEvaluator();
        Compiler compiler = eval.getCompiler();
        XQueryExecutable xquery = compiler.compile(xpath);
        XdmResultSet results = eval.evaluate(xquery);
        System.out.println ("query evaluated in " + (System.currentTimeMillis() - t) + " msec,  retrieved " + results.size() + " result");
        AbstractExpression aex = compiler.makeTranslator().queryFor(xquery).getBody();
        aex = new UnOptimizer(index.indexer.getConfiguration()).unoptimize(aex);
        XQueryExecutable baseline = compiler.compile(aex.toString());
        XdmResultSet baseResult = eval.evaluate(baseline);
        assertEquals ("result count mismatch for: " + xquery.toString(), baseResult.size(), results.size());        
    }
    
    @Test
    public void testComparisonPredicate () throws Exception {
        long t = System.currentTimeMillis();
        String xpath = "//SCNDESCR[. >= //PERSONA]";
        Evaluator eval = index.makeEvaluator();
        Compiler compiler = eval.getCompiler();
        XQueryExecutable xquery = compiler.compile(xpath);
        XdmResultSet results = eval.evaluate(xquery);
        System.out.println ("query evaluated in " + (System.currentTimeMillis() - t) + " msec,  retrieved " + results.size() + " results");
        XQuery optimized = eval.getCompiler().makeTranslator().queryFor(xquery);
        XQuery unoptimized = new UnOptimizer(index.indexer.getConfiguration()).unoptimize(optimized);
        XQueryExecutable baseline = compiler.compile(unoptimized.toString());
        XdmResultSet baseResult = eval.evaluate(baseline);
        assertEquals ("result count mismatch for: " + xpath, baseResult.size(), results.size());
    }
    
    @Test
    public void testConstantExpression() throws Exception {
        // This resolves to a constant (Literal=true()) XPath expression and generates
        // a null Lucene query.  Make sure we don't try to execute the query.
        XdmResultSet results = assertSearch("'remorseless' or descendant::text", QUERY_CONSTANT);
        assertEquals (1, results.size());
    }
    
    @Test
    public void testMultipleAbsolutePaths() throws Exception {
        // /PLAY/PERSONAE/PGROUP/PERSONA
        assertSearch("4", "count (//PERSONA[.='ROSENCRANTZ'])", 0, 4);
        assertSearch("4", "count (//PERSONA[.='GUILDENSTERN'])", 0, 4);
        // Our first naive implementation tried to fetch all relevant documents
        // using a single database query - this test tests multiple independent
        // sequences.
        // we retrieved 8 documents from search, because there are two queries generated, but
        // only 5 unique docs, and we cache, so only 5 docs are actually retrieved
        assertSearch("8", "count (//PERSONA[.='ROSENCRANTZ']) + count(//PERSONA[.='GUILDENSTERN'])", 0, 8);
    }
    
    @Test
    public void testLazyEvaluation () throws Exception {
        // These expressions are optimized in the sense that lux evaluates them all by retrieving
        // only the minimal number of required documents (with the available indexes).

        // Note this relies on Lucene's default sort by order of insertion (ie by docid)
        assertSearch ("BERNARDO", "subsequence(//SCENE, 1, 1)/SPEECH[1]/SPEAKER/string()", null, 1);
        assertSearch ("BERNARDO", "(//SCENE)[1]/SPEECH[1]/SPEAKER/string()", null, 1);
        // /PLAY/ACT[1]/SCENE[1], /ACT[1]/SCENE[1], /SCENE[1], /SCENE[2], /SCENE[3], /SCENE[4]
        // count reduced from 6 to 4 by path queries; skip /PLAY and /ACT[1]
        assertSearch ("HAMLET", "subsequence(/SCENE, 4, 1)/SPEECH[1]/SPEAKER/string()", null, 1);
    }
    
    @Test
    public void testSkipDocs () throws Exception {
        // Earlier implementations failed to indicate that the returned sequence of documents is sorted in document
        // order, causing Saxon to pull the entire result sequence.
        assertSearch ("KING CLAUDIUS", "subsequence((/)[.//SCENE], 4, 1)//SPEECH[1]/SPEAKER/string()", null, 1);
        assertSearch ("BERNARDO", "(//SCENE/SPEECH)[1]/SPEAKER/string()", null, 1);
    }
    
    @Test
    public void testSkipDocs2 () throws Exception {
        assertSearch ("BERNARDO", "(//SCENE/SPEECH)[1]/SPEAKER/string()", null, 1);
    }
    
    @Test
    public void testRoot () throws Exception {
        assertSearch ("KING CLAUDIUS", "(//SCENE/root())[4]//SPEECH[1]/SPEAKER/string()", null, 1);
        assertSearch ("KING CLAUDIUS", "subsequence(//SCENE/root(), 4, 1)//SPEECH[1]/SPEAKER/string()", null, 1);        
    }
    
    @Test @Ignore
    public void testOptimizeLast () throws Exception {
        // Failed to optimize this.
        // 
        // We should be able to retrieve the last document, and then get its last speech      
        // best idea for optimizing this is to add pagination to lux:search
        assertSearch ("PRINCE FORTINBRAS", "(lux:search('lux_elt_name_ms:SPEECH')[last()]//SPEECH)[last()]/SPEAKER/string()", null, 1);
        assertSearch ("PRINCE FORTINBRAS", "(//SPEECH)[last()]/SPEAKER/string()", null, 1164);
    }
    
    @Test
    public void testIntersection () throws Exception {
        assertSearch ("4", "count(/SCENE[@act='3'])", null, 4);
        assertSearch ("5", "count(/SCENE[@scene='2'])", null, 5);
        // saxon cleverly optimizes this and gets rid of the intersect
        assertSearch ("1", "count(/SCENE[@act='3'] intersect /SCENE[@scene=2])", null, 1);
    }
    
    @Test
    public void testDocumentIdentity() throws Exception {
        /* This test confirms that document identity is preserved when creating Saxon documents
         * because the intersect operator relies on document/node identity. Each search call
         * retrieves the documents separately and uses the cache to preserve identity across multiple
         * searches. 
         */
        assertSearch ("28", "count(lux:search('<SPEECH:Horatio')/SPEECH[contains(., 'Horatio')])", null, 40, 40);        
        assertSearch ("8", "count(lux:search('<SPEECH:philosophy')//SPEECH[contains(., 'philosophy')])", null, 7, 7);
        // in docid order
        assertSearch ("1", "count(lux:search('<SPEECH:philosophy', 'lux:docid')//SPEECH[contains(., 'philosophy')] intersect lux:search('<SPEECH:Horatio', 'lux:docid')/SPEECH[contains(., 'Horatio')])", null, 29, 29);        
        // in relevance order - Saxon sorts the documents
        assertSearch ("1", "count(lux:search('<SPEECH:philosophy')//SPEECH[contains(., 'philosophy')] intersect lux:search('<SPEECH:Horatio')/SPEECH[contains(., 'Horatio')])", null, 47, 47);
    }
    
    /* Tests relating to element visibility
     * 
     * setup: all elements opaque by default, LINE transparent, name transparent, SCENE a container,
     * hidden is hidden
     */
    
    @Test
    public void testOpaqueElement() throws Exception {
        // /PLAY/FM/P[2] contains "Bosak" but P is opaque
        assertSearch ("0", "lux:count('<FM:Bosak')", null, 0, 0);
        // <name> element is transparent so phrases continue through it
        assertSearch ("3", "lux:count('<P:\"XML version by Jon Bosak\"')", null, 3, 0);
        assertSearch ("1", "count(/FM[contains(., 'Bosak')])", null, 1, 1);
    }
    
    @Test
    public void testTransparentElement() throws Exception {
        // sword always occurs in LINE elements, which are transparent, so indexed as part of SPEECH
        assertSearch ("5", "lux:count('<SPEECH:\"swear by my sword\"')", null, 5, 0);
        assertSearch ("24", "lux:count('<SPEECH:sword')", null, 24, 0);
        // content of LINE is included in <:SPEECH but not above that, since SPEECH is opaque:
        assertSearch ("0", "lux:count('<ACT:sword')", null, 0, 0);
    }
    
    @Test 
    public void testContainerElement() throws Exception {
        // SCENE is a container element; sword occurs in I;5, II;2, III;1, III;3, IV;3, IV;5, IV;7.
        // V;2 has 'swords', but there's no stemming in the default analyzer:
        // a PLAY, 4 ACTs, and 7 SCENEs
        assertSearch ("12", "lux:count('<SCENE:sword')", null, 12, 0);
        assertSearch ("3", "lux:count('<SCENE:\"Swear by my sword\"')", null, 3, 0);
        // checks all 20 of the scenes
        assertSearch ("1", "count(/SCENE[contains(.,\"Swear by my sword\")])", null, 20, 20);
    }
    
    @Test
    public void testHiddenElement() throws Exception {
        assertSearch ("0", "lux:count('<hidden:adam')", null, 0, 0);
        // 2x /PLAY/ACT/SCENE/SPEECH/LINE, in the same SCENE, but not /PLAY/FM/P/name/hidden
        // and we don't handle the possessive in Adam's, so only 5, not 7
        assertSearch ("5", "lux:count('<LINE:adam')", null, 5, 0);
        assertSearch ("0", "lux:count('name:adam')", null, 0, 0);
        assertSearch ("0", "lux:count('hidden:adam')", null, 0, 0);
        assertSearch ("5", "lux:count('adam')", null, 5, 0);
        // phrase wraps around hidden element
        assertSearch ("4", "lux:count('<name:\"michael sokolov\"')", null, 4, 0);
        assertSearch ("4", "lux:count('\"michael sokolov\"')", null, 4, 0);
    }
    
    @Test
    public void testDocumentOrder() throws Exception {
        /* This test confirms that the document ordering asserted by the Optimizer 
         * is correct since if document order in Saxon
         * is not the same as document order in Lucene, then the first 31st document will not be
         * what we expect.  31 is a magic number because /PLAY has 20 /PLAY/ACT/SCENE, 
         * /ACT 1 has 5 /ACT/SCENE, then those 5 are repeated as /SCENE. The 31st should be 
         * /ACT[2]/SCENE[1], but since this will already have been created, its Saxon document 
         * number would be low using the built-in numbering scheme, and the order mismatch causes 
         * Saxon to terminate the intersection prematurely. */
        assertSearch ("5", "count(/ACT/SCENE intersect subsequence(//SCENE, 1, 30))", null, 9, 9);
        assertSearch ("6", "count(/ACT/SCENE intersect subsequence(//SCENE, 1, 31))", null, 10, 10);
    }
    
    @Test
    public void testPaths () throws Exception {
        // test path ordering:
        assertSearch (null, "/ACT/PLAY", null, 0);
        assertSearch (null, "//ACT//PLAY", null, 0);
        // test path distance:
        assertSearch ("Where is your son?", "string(/PLAY/ACT[4]/SCENE[1]/SPEECH[1]/LINE[3])", null, 1);
        // Q: who decides what serialization to use?
        //assertSearch ("Where is your son?", "/PLAY/ACT[4]/SCENE[1]/SPEECH[1]/LINE[3]", null, 1);
        assertSearch ("Where is your son?", "string((/PLAY/ACT[4]/*/*/LINE)[3])", null, 1);
        // no result, but we can't tell from the query and have to retrieve the document and process it
        assertSearch (null, "/PLAY/ACT[4]/*/*/*/*/LINE", null, 1);
    }
    
    @Test
    public void testReversePaths () throws Exception {
        // expresses a deepish path in reverse order, using predicates
        assertSearch ("Where is your son?", "string(//LINE[3]" +
                "[parent::SPEECH[not(preceding-sibling::SPEECH)]]" +
                "[ancestor::SCENE[count(preceding-sibling::SCENE)=0]]" +
                "[ancestor::ACT[count(preceding-sibling::ACT)=3]]" +
                "[ancestor::PLAY])", null, 1);
        assertSearch ("Where is your son?", "string(//ACT[4]/SCENE[1]/SPEECH[1]/LINE[3]" +
        		"[../../../../self::PLAY[.. is root()]])", null, 1);
    }

    @Test
    public void testElementFullTextPhrase () throws Exception {
        // test phrase query generation
        // also handling of capitalization and tokenization (w/punctuation)
        assertSearch ("5", "count(//LINE[.='Holla! Bernardo!'])", null, 5, 5);
        assertSearch ("0", "count(//LINE[.='Holla!'])", null, 5, 5);
        assertSearch ("0", "count(//LINE[.='Holla Bernardo'])", null, 5, 5);
        // We cannot optimize this one due to the function call around (.):
        assertSearch ("1", "count(/ACT//LINE[lower-case(.)='holla! bernardo!'])", null, 5, 5);
        // ensure that paths ending in Dot don't accidentally reference the outer context
        assertSearch ("0", "count(/ACT//LINE[FOO//.='holla! bernardo!'])", null, 0, 0);
        // check stop word handling
        assertSearch ("<LINE>Where is your son?</LINE>", "//LINE[.='Where is your son?']", null, 5, 5);
    }
    
    @Test public void testFullText () throws Exception {
        assertSearch ("Where is your son?", "//*[.='Where is your son?']/string()", null, 5, 5);
    }
    
    @Test public void testContains () throws Exception {
        /*
         * When we had a contains optimization:
        assertSearch ("5", "count(//LINE[contains(.,'Holla')])", null, 5, 5);
        assertSearch ("true", "contains(/PLAY,'Holla')", null, 1, 1);
        // searches match all 10 instances of 'given' since they will be case-insensitive
        // There is also one occurrence of 'forgiveness' that should match
        assertSearch ("1", "count (/LINE[contains(.,'Given')])", null, 11, 11);
        assertSearch ("10", "count (/LINE[contains(.,'given')])", null, 11, 11);
        */
        int lineCount = index.elementCounts.get("LINE");
        // has to check every /LINE document:
        assertSearch ("1", "count(/LINE[contains(.,'olla! Bern')])", null, lineCount, lineCount);        
    }
    
    @Test public void testLuxSearch () throws Exception {
        assertSearch ("5", "count(lux:search('\"holla bernardo\"'))", null, 5, 0);
        assertSearch ("5", "count(lux:search('<:\"holla bernardo\"'))", null, 5, 0);
        assertSearch ("5", "count(lux:search('<LINE:\"holla bernardo\"'))", null, 5, 0);
        try {
            assertSearch (null, "lux:search(1,2,3)", null, null, null);
            assertTrue ("expected exception not thrown", false);
        } catch (LuxException e) { }
        try {
            assertSearch (null, "lux:search(1,2)", null, null, null);
            assertTrue ("expected exception not thrown", false);
        } catch (LuxException e) { }
        try {
            assertSearch (null, "lux:search(':::')", null, null, null);
            assertTrue ("expected exception not thrown", false);
        } catch (LuxException e) { 
            assertTrue (e.getMessage(), e.getMessage().startsWith("Cannot parse ':::'"));
        }
        assertSearch ("65", "lux:count(text{'bernardo'})", null, 65, 0);
    }
    
    @Test 
    public void testLuxSearchPath () throws Exception {
        assertSearch ("1", "count(lux:search('\"holla bernardo\"')/SPEECH)", null, 5, 5);   
    }
    
    /* Bug found in the wild - the opto that preserves document-ordering by embedding the trailing path 
     * in a predicate applied an incorrect query. */
    @Test
    public void testLuxSearchRoot () throws Exception {
        // This is the actual bug:
        String query = "lux:search (\"<@scene:5\")[1]/root()";
        assertSearch ("__IGNORE__", query, null, 1, 1);
        // Some attempts to reproduce, kept for posterity?
        // first result is LINE due to TFIDF (relevance) scoring
        assertSearch ("LINE", "lux:search('\"holla bernardo\"')[1]/root()/*/name()", null, 1, 1);
        assertSearch (null, "lux:search('<@id:100')[1]/root()/*/name()", null, 0, 0);
    }
    
    @Test
    public void testBugFix0018() throws Exception {
        assertSearch ("MARCELLUS", "for $doc in /SPEECH[LINE='Holla! Bernardo!'] return $doc/SPEAKER/string()", null, 1, 1);
    }
    
    @Test
    public void testEmptyReturn() throws Exception {
        assertSearch (null, "for $doc in /SPEECH[LINE='Holla! Bernardo!'] return $doc/UNKNOWN/string()", null, 0, 0);
    }
    
    @Test 
    public void testBugFix0018b() throws Exception {
        assertSearch (HAMLET_TITLE_MARKUP, "lux:search(\"*:*\")[2]", null, 1, 1);
    }

    @Test 
    public void testTrailingStringCall () throws Exception {
        assertSearch ("Where is your son?", "/PLAY/ACT[4]/SCENE[1]/SPEECH[1]/LINE[3]/string()", null, 1);        
    }
    
    @Test
    public void testOrderBy () throws Exception {
        // TODO: we don't yet have a solution that allows us to push the order by
        // optimization (to say nothing of additional constraints) into a user-supplied
        // query using the string query syntax.
        assertSearch ("ACT", "(for $doc in lux:search('bernardo')" + 
            " order by lux:key('doctype', $doc) return $doc/*/name())[1]", 0, 1);
    }
        
    @Test
    public void testOrderByPagination () throws Exception {
        assertSearch ("SPEAKER", "(for $doc in lux:search('bernardo')" + 
            " order by lux:key('doctype', $doc) return $doc/*/name())[21]", 0, 1);
        assertSearch ("<SPEAKER>BERNARDO</SPEAKER>", "(for $doc in lux:search('bernardo')" + 
                " order by lux:key('doctype', $doc) return $doc)[21]", 0, 1);
    }
    
    @Test
    public void testHighlight () throws Exception {
        assertSearch ("<TITLE>The Tragedy of <B>Hamlet</B>, Prince of Denmark</TITLE>",
                      "lux:highlight(/PLAY/TITLE, 'hamlet')", null, null);
    }

    @Test
    public void testHighlightMultiple () throws Exception {
        assertSearch ("<TITLE>The <B>Tragedy</B> <B>of</B> <B>Hamlet</B>, Prince <B>of</B> Denmark</TITLE>", 
                      "lux:highlight(/PLAY/TITLE,'tragedy of hamlet')", null, null);
    }

    @Test
    public void testHighlightPhrase () throws Exception {
        assertSearch ("<TITLE>The <B>Tragedy</B> <B>of</B> <B>Hamlet</B>, Prince of Denmark</TITLE>",
                      "lux:highlight(/PLAY/TITLE, '\"tragedy of hamlet\"')", null, null);
    }
    
    @Test
    public void testHighlightElementQuery () throws Exception {
        assertSearch ("<TITLE>The Tragedy of <B>Hamlet</B>, Prince of Denmark</TITLE>", 
                "lux:highlight(/PLAY/TITLE, '<TITLE:hamlet')", null, null);
    }
    
    @Test
    public void testHighlightElementMultiple () throws Exception {
        assertSearch ("<TITLE>The <B>Tragedy</B> of <B>Hamlet</B>, Prince of Denmark</TITLE>", 
                "lux:highlight(/PLAY/TITLE, '<TITLE:hamlet <TITLE:tragedy')", null, null);
    }
    
    // Highlighting element-phrase-queries is not well-supported by the current highlighter.
    // because the Lucene phrase highlighting is restricted to operate on a single field,
    // and we use the main text field.  So we choose to err on thse side of over-highlighting
    // using a workaround that ignores element restrictions in the presence of phrase queries.
    @Test
    public void testHighlightElementPhrase () throws Exception {
        assertSearch ("<TITLE>The <B>Tragedy</B> <B>of</B> <B>Hamlet</B>, Prince of Denmark</TITLE>", 
                "lux:highlight(/PLAY/TITLE, '<TITLE:\"tragedy of hamlet\"')", null, null);
    }
    
    @Test 
    public void testHighlightMixedQuery () throws Exception {
        assertSearch ("<TITLE>The <B>Tragedy</B> <B>of</B> <B>Hamlet</B>, Prince of Denmark</TITLE>",
                "lux:highlight(/PLAY/TITLE, '<TITLE:tragedy \"of hamlet\"')", null, null);
    }
    
    @Test
    public void testHighlightAttributeQuery () throws Exception {
        // no highlighting in attributes
        assertSearch ("<node id=\"10\">node 10</node>", "lux:highlight(<node id=\"10\">node 10</node>, '<@id:10')", null, null);
    }
    
    // Make sure text offset calculations handle multiple text nodes
    @Test
    public void testHighlightComplexContent() throws Exception {
        assertSearch ("<FM>\n<P>Text placed in the public domain by Moby Lexical Tools, 1992.</P>\n" +
        		"<P>SGML markup by <name><B>Jon</B> <B>Bosak</B></name>, 1992-1994.</P>\n" +
        		"<P>XML version by <name><B>Jon</B> <B>Bosak</B></name>, 1996-1998.</P>\n" +
        		"<P>XML attributes and name tagging added by <name>Michael <hidden>Adam</hidden> Sokolov</name>, 2013</P>\n" +
        		"<P>This work may be freely copied and distributed worldwide.</P>\n</FM>", 
                "lux:highlight(/FM, 'Jon Bosak')", null, null);
    }
    
    @Test 
    public void testFunctionDefinition() throws Exception {
        // test the optimization of function bodies, and also of the collection()
        // function call.  collection() must be used inside function bodies since 
        // Saxon generates static (compilation) errors due to the lack of a context item 
        // for the absolute paths 
        assertSearch (HAMLET_TITLE_MARKUP,
            "declare namespace test='test'; " +
            "declare function test:function () { collection()/PLAY/TITLE }; " +
            "test:function()", null, 1);
    }
    
    @Test
    public void testFlworPath() throws Exception {
        // test that a let clause that doesn't match any data doesn't prevent the
        // entire query from matching
        assertSearch (HAMLET_TITLE_MARKUP, "let $play := collection()/PLAY[TITLE]\n" +
        		"return ($play/TITLE)", null, 1);
    }
    
    @Test
    public void testSequence() throws Exception {
        // test that queries generated by a sequence combine properly (ie using OR)
        // if $play = () then we should get back ''.
        assertSearch (HAMLET_TITLE, "let $play := collection()/PLAY[TITLE]\n" +
                "return (string($play/TITLE), $play/@id)", null, 1);
    }
    
    @Test
    public void testLet() throws Exception {
        assertSearch (HAMLET_TITLE, "let $play := collection()/PLAY[TITLE]\n" +
                "let $id := $play/@id\n" +
                "return (string($play/TITLE), $id)", null, 1);
    }

    @Test
    public void testIrrelevantLet() throws Exception {
        // test that a let clause that doesn't match any data doesn't prevent the
        // entire query from matching.  This is hard to do b/c Saxon tends
        // to optimize away so much.  This also provides an opportunity to ensure
        // that we optimize through the use of variables, since the variables
        // are preserved in this expression, even after Saxon's optos.
        assertSearch (HAMLET_TITLE, "let $play := collection()/PLAY[starts-with(TITLE,'The ')]\n" +
                "let $id := $play/@id\n" +
                "return <result id='{$id}'>{if ($id) then '' else $play/TITLE}</result>/string()", null, 1);
    }
    
    @Test
    public void testDeepPagination () throws Exception {
    	//ensure that deep pagination skips intervening documents without loading them into memory
    	assertSearch ("1", "count(collection()[1000]/*)", null, 1);
    }
    
    @Test
    public void testWhereAtClause () throws Exception {
        // return the index of the first /SCENE document ; the first SCENE is the 49th element in hamlet.xml,
        // and therefore the root of document #49 in the test set
        String query = "(for $doc at $i in collection() where $doc/SCENE return $i)[1]";
        assertSearch ("49", query, null, 49);
    }
    
    @Test
    public void testFieldValuesComparison () throws Exception {
    	String query = "collection()[lux:key('doctype')='SCENE'][1]/descendant::SPEECH[1]/SPEAKER/string()";
    	// there are 20 scenes in Hamlet, but we only need to pull the first one for this query
    	assertSearch ("BERNARDO", query, null, 1, 1);
        
    	query = "collection()[lux:key('doctype')='SCENE'][1]/descendant::SPEAKER[1]/string()";
        assertSearch ("BERNARDO", query, null, 1, 1);

        query = "count(collection()[lux:key('doctype')='SCENE'])";
    	assertSearch ("20", query, null, 20, 0);
    }

    @Test
    public void testPredicateChain() throws Exception {
    	String query = "count(//ACT[1]/SCENE[2]/SPEECH[3]/SPEAKER)";
    	assertSearch ("6", query, null, 6, 6);
    }
    
    @Test
    public void testNestedPredicateComparison() throws Exception {
        String query = "exists(/PLAY[ACT[SCENE/TITLE='SCENE IV.  The platform.']])";
    	assertSearch ("true", query, null, 1, 1);
    }
    
    @Test
    public void testRangeInequality() throws Exception {
        // we have five ACTs 
        String query = "count((/)[lux:key('doctype') <= 'ACT'])";
        assertSearch ("5", query, null, 5, 0);
        query = "count((/)[lux:key('doctype') le 'ACT'])";
        assertSearch ("5", query, null, 5, 0);
    }

    @Test
    public void testCombinedRange() throws Exception {
        String query = "count((/)[lux:key('doctype') >= 'A'][lux:key('doctype') <= 'B'])";
        assertSearch ("5", query, null, 5, 0);

        // we have one FM and two GRPDESCR
        query = "count((/)[lux:key('doctype') > 'F'][lux:key('doctype') < 'H'])";
        assertSearch ("3", query, null, 3, 0);
        query = "count((/)[lux:key('doctype') gt 'F' and lux:key('doctype') lt 'H'])";
        assertSearch ("3", query, null, 3, 0);
    }
    
    @Test
    public void testRangeGenComp() throws Exception {
        // we have only one SCNDESCR, but we don't optimize this yet
        String query = "count((/)[lux:key('doctype') gt 'S' and lux:key('doctype') lt 'T' " +
        		"and not(lux:key('doctype') = ('SCENE','SPEECH','SPEAKER','STAGEDIR'))])";
        assertSearch ("1", query, null, 2552, 2552);
    }
    
    @Test
    public void testFieldValuesNoContext () throws Exception {
    	// compare an integer against a string-valued field
    	String query = "if (2 eq lux:key('xxx')) then 'yes' else 'no'";
        try {
            assertSearch ("false", query, null, 0, 0);
        	fail ("expected exception not thrown");
        } catch (LuxException e) {
        	assertTrue (e.getMessage().contains("no context defined"));
        }
    }
    
    @Test
    public void testIntFieldEquality() throws Exception {
    	String query;
    	// check that our int-valued field was indexed correctly:
    	query = "(/ACT)[2]/lux:key('actnum')";
    	assertSearch ("2", query, null, 1, 1);
    	// do a basic int comparison
    	query = "count(collection()[2 eq lux:key('actnum')])";
    	assertSearch ("3", query, null, 3, 0);
    	// Try comparing an integer against a string-valued field
    	query = "count(collection()[2 eq lux:key('actstr')])";
    	try {
    		assertSearch ("0", query, null, 0, 0);
    		fail ("expected exception not thrown");
    	} catch (LuxException e) {
    		assertEquals("Cannot compare xs:integer to xs:string", e.getMessage());
    	}
    }

    @Test
    public void testIntFieldInequality() throws Exception {
    	String query;
    	// do a basic int comparison
    	query = "count(collection()[lux:key('actnum') lt 2])";
    	assertSearch ("6", query, null, 6, 0);
    	query = "count(collection()[lux:key('actnum') < 2])";
    	assertSearch ("6", query, null, 6, 0);
    	query = "count(collection()[lux:key('actnum') > 2][lux:key('actnum') <= 3])";
    	assertSearch ("5", query, null, 5, 0);
    }

    @Test
    public void testLongFieldInequality() throws Exception {
    	// do a basic long comparison, and make sure comparison with other numeric types is allowed
    	String query = "count(collection()[lux:key('scnlong') gt xs:int(5)])";
    	assertSearch ("2", query, null, 2, 0);
    }
    
    @Test @Ignore
    public void testXPathRangeQuery () throws Exception {
    	String query;
    	// do a basic integer comparison; the cast is required for atomic comparison
    	query = "count(//SCENE[xs:integer(@act) lt 2])";
    	assertSearch ("8", query, null, 6, 6);
    	query = "count(//SCENE[@act < 2])";
    	assertSearch ("6", query, null, 4, 4);
    	query = "count(//SCENE[xs:integer(@act) > 2][xs:integer(@act) <= 3])";
    	assertSearch ("9", query, null, 7, 7);
    }

    @Test
    public void testAttributePredicate() throws Exception {
    	// from Geet Gangwar
    	//context /@id[.='I2009']
    	String query = "count(//SCENE/@act[.='2'])";
    	assertSearch ("6", query, null, 4, 4);
    	query = "//SCENE/@act[.='2']";
    	XdmResultSet results = assertSearch (query, (Integer) null, 4, 4);
    	assertEquals (6, results.getXdmValue().size());
    }
    
    /* See LUX-62 */
    @Test @Ignore
    public void testTimestampRange () throws Exception {
        String query = "lux:count('timestamp:[2013-09-01T21:30:50.515Z TO NOW]')";
        assertSearch ("6636", query, null, 0, 0);
    }
    
    @Test
    public void testEmptySort() throws Exception {
        assertSearch ("PLAY", "name(lux:search('yorick',())[1]/*)", null, 1, 1);
    }
    
}

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */