package lux;
import static lux.IndexTestSupportBase.*;
import static org.junit.Assert.*;
import java.util.Iterator;
import lux.exception.LuxException;
import lux.saxon.UnOptimizer;
import lux.xpath.AbstractExpression;
import lux.xquery.XQuery;
import net.sf.saxon.s9api.XQueryExecutable;
import net.sf.saxon.s9api.XdmNode;
import org.junit.BeforeClass;
import org.junit.Ignore;
import org.junit.Test;
import org.junit.runner.RunWith;
/**
* Check a variety of XPath queries, ensuring that results when executed using the default indexing
* settings, as provided by IndexTestSupport, are correct,
* and test that expected optimizations are in fact being applied.
*/
@RunWith (MultiThreadedRunner.class)
public class SearchTest extends BaseSearchTest {
private static final String HAMLET_TITLE_MARKUP = "<TITLE>The Tragedy of Hamlet, Prince of Denmark</TITLE>";
private static final String HAMLET_TITLE = "The Tragedy of Hamlet, Prince of Denmark";
@BeforeClass
public static void setup() throws Exception {
setup ("lux/hamlet.xml");
}
@Test
public void testSearchAllDocs() throws Exception {
XdmResultSet results = assertSearch("/", IndexTestSupportBase.QUERY_EXACT);
assertEquals (index.totalDocs, results.size());
}
@Test
public void testCountAllDocs () throws Exception {
XdmResultSet results = assertSearch ("count(/)", QUERY_NO_DOCS, totalDocs);
assertEquals (String.valueOf(totalDocs), results.iterator().next().toString());
results = assertSearch ("count(collection())", QUERY_NO_DOCS, totalDocs);
assertEquals (String.valueOf(totalDocs), results.iterator().next().toString());
results = assertSearch ("count(lux:search('*:*'))", QUERY_NO_DOCS, totalDocs);
assertEquals (String.valueOf(totalDocs), results.iterator().next().toString());
}
@Test
public void testCountActChildren () throws Exception {
// Test an assumption about query accuracy:
// span query slop is less precise than the XPath here, so 6 documents
// must be examined even though only 1 (the PLAY) contains the five child ACTs
assertSearch ("5", "count (/*/ACT)", 0, 6);
}
@Test
public void testExists () throws Exception {
assertSearch ("true", "exists(/)", QUERY_NO_DOCS, 1);
assertSearch ("true", "exists(//SCENE)", QUERY_NO_DOCS, 1);
assertSearch ("false", "exists(//foo)", QUERY_NO_DOCS, 0);
assertSearch ("true", "exists(//SCENE/root())", QUERY_NO_DOCS, 1);
assertSearch ("true", "exists(//SCENE) and exists(//ACT)", QUERY_NO_DOCS, 2);
assertSearch ("true", "exists(//SCENE/root()//ACT)", QUERY_NO_DOCS, 1);
assertSearch ("true", "exists((/)[.//SCENE and .//ACT])", QUERY_NO_DOCS, 1);
assertSearch ("true", "exists(//ACT//SCENE)", QUERY_NO_DOCS, 1);
}
@Test
public void testEmpty () throws Exception {
XdmResultSet results = assertSearch ("empty(/)", QUERY_NO_DOCS, 1);
assertEquals ("false", results.iterator().next().toString());
assertSearch ("false", "empty(//SCENE)", QUERY_NO_DOCS, 1);
assertSearch ("true", "empty(//foo)", QUERY_NO_DOCS, 0);
assertSearch ("false", "empty(//SCENE/root())", QUERY_NO_DOCS, 1);
assertSearch ("true", "empty(//SCENE) or empty(//foo)", QUERY_NO_DOCS, 1);
assertSearch ("false", "empty(//SCENE/root()//ACT)", QUERY_NO_DOCS, 1);
assertSearch ("false", "empty((/)[.//SCENE and .//ACT])", QUERY_NO_DOCS, 1);
}
@Test
public void testNot() throws Exception {
XdmResultSet results = assertSearch ("not(/)", QUERY_NO_DOCS, 1);
assertEquals ("false", results.iterator().next().toString());
assertSearch ("false", "not(//SCENE)", QUERY_NO_DOCS, 1);
assertSearch ("true", "not(//foo)", QUERY_NO_DOCS, 0);
assertSearch ("false", "not(//SCENE/root())", QUERY_NO_DOCS, 1);
assertSearch ("true", "not(//SCENE) or not(//foo)", QUERY_NO_DOCS, 1);
assertSearch ("false", "not(//SCENE/root()//ACT)", QUERY_NO_DOCS, 1);
assertSearch ("false", "not((/)[.//SCENE and .//ACT])", QUERY_NO_DOCS, 1);
assertSearch ("true", "not(//SCENE//ACT)", QUERY_NO_DOCS, 0);
}
@Test
public void testNotExists() throws Exception {
// Trying to come up with a case where allowing the query from the expression inside
// exists() bleeds out into the surrounding query and cause an incorrect result,
// but Saxon tends to convert all these expressions:
// assertSearch ("1", "count(/FM[exists(BLAH) eq false()])", 0, 1);
// assertSearch ("1", "count(/FM[exists(BLAH) = false()])", 0, 1);
// into this one, which is safe because the not() is treated as a non-optimized function,
assertSearch ("1", "count(/FM[not(exists(BLAH))])", 0, 1);
// However this one exhibited the expected over-optimization failure:
assertSearch ("1", "count(/FM[exists(BLAH) eq exists(BLARG)])", 0, 1);
assertSearch ("1", "count(/FM[exists(BLAH) = exists(BLARG)])", 0, 1);
assertSearch ("0", "count(/FM[exists(BLAH) != exists(BLARG)])", 0, 1);
assertSearch ("0", "count(/FM[BLAH eq string(BLARG)])", 0, 0);
// NOTE: () eq () === ()
assertSearch ("0", "count(/FM[BLAH eq BLARG])", 0, 0);
// NOTE: () = () === false()
assertSearch ("0", "count(/FM[BLAH = BLARG])", 0, 0);
// we don't optimize along the parent axis
assertSearch ("20", "count(//SCENE[not(exists(parent::ACT))])", 0, 26);
assertSearch ("1", "count(exists(/BLAH))", 0, 0);
}
@Test public void testLuxCount () throws Exception {
assertSearch ("5", "lux:count('lux_path:\"\\{\\} ACT\"')", null, 5, 0);
}
@Test
public void testPathOrder () throws Exception {
// Make sure that the Optimizer doesn't incorrectly assert
// order is *not* significant in the generated query;
// it should be (SCENE AND ACT), and the query is *not* countable:
// it's *not* how many documents have scenes and acts: it's how many scenes are there in documents with acts
// Overall there are 20 scenes in 5 acts in 1 play
// 40 = 20 (SCENEs in /PLAY) + 20 (SCENEs in the 5 /ACTs together)
assertSearch ("40", "count(//ACT/root()//SCENE)", 0, 6);
// 10 = 5 (ACTs in /PLAY) + 5 /ACT documents.
assertSearch ("10", "count(//SCENE/root()//ACT)", 0, 6);
// Why did we think this?:
// 120 = 20 (scenes) * 5 (acts) in 1 /PLAY + 20 scenes in 5 /ACT documents
}
@Test
public void testSearchAct() throws Exception {
// path indexes make this exact
XdmResultSet results = assertSearch ("/ACT", QUERY_EXACT);
assertEquals (index.elementCounts.get("ACT") + 0, results.size());
// Make sure that collection() is optimized
results = assertSearch ("collection()/ACT", QUERY_EXACT);
assertEquals (index.elementCounts.get("ACT") + 0, results.size());
// and that references to variables are optimized
results = assertSearch ("let $context := collection() return $context/ACT", QUERY_EXACT);
assertEquals (index.elementCounts.get("ACT") + 0, results.size());
}
@Test
public void testSearchActScene() throws Exception {
XdmResultSet results = assertSearch("/ACT/SCENE", QUERY_MINIMAL);
assertEquals (index.elementCounts.get("SCENE") + 0, results.size());
}
@Test
public void testSearchAllScenes() throws Exception {
XdmResultSet results = assertSearch("/ACT", QUERY_MINIMAL);
assertEquals (5, results.size());
XdmNode node = (XdmNode) results.iterator().next();
String actURI = node.getDocumentURI().toString();
results = assertSearch("//SCENE", QUERY_MINIMAL);
// every SCENE, in its ACT and in the PLAY
assertEquals (index.elementCounts.get("SCENE") * 3, results.size());
Iterator<?> iter = results.iterator();
for (int i = 0; i < index.elementCounts.get("SCENE"); i++) {
// each scene, from the /PLAY document
node = (XdmNode) iter.next();
assertEquals ("lux://lux/hamlet.xml", node.getDocumentURI().toString());
assertEquals ("lux://lux/hamlet.xml", node.getBaseURI().toString());
}
XdmNode act1 = (XdmNode) iter.next();
assertEquals (actURI, act1.getBaseURI().toString());
}
@Test
public void testSearchAllSceneDocs() throws Exception {
XdmResultSet results = assertSearch("(/)[.//SCENE]", QUERY_EXACT);
// every SCENE, in its ACT and in the PLAY
assertEquals (index.elementCounts.get("SCENE") + index.elementCounts.get("ACT") + 1, results.size());
}
@Test
public void testSearchAllSceneDocsRoot() throws Exception {
XdmResultSet results = assertSearch("//SCENE/root()", QUERY_EXACT);
// every SCENE, in its ACT and in the PLAY
assertEquals (index.elementCounts.get("SCENE") + index.elementCounts.get("ACT") + 1, results.size());
}
@Test
public void testCountDocs () throws Exception {
// every SCENE, in its ACT and in the PLAY
int sceneDocCount = index.elementCounts.get("SCENE") + index.elementCounts.get("ACT") + 1;
XdmResultSet results = assertSearch("count (//SCENE/root())", QUERY_NO_DOCS);
assertResultValue(results, sceneDocCount);
results = assertSearch("count ((/)[.//SCENE])", QUERY_NO_DOCS);
assertResultValue(results, sceneDocCount);
results = assertSearch("count (//SCENE/ancestor::document-node())", QUERY_NO_DOCS);
assertResultValue(results, sceneDocCount);
results = assertSearch("count (/descendant-or-self::SCENE/root())", QUERY_NO_DOCS);
assertResultValue(results, sceneDocCount);
results = assertSearch("count (/descendant::SCENE/root())", QUERY_NO_DOCS);
assertResultValue(results, sceneDocCount);
results = assertSearch("count (/SCENE)", QUERY_NO_DOCS);
assertResultValue(results, index.elementCounts.get("SCENE"));
}
@Test
public void testSyntaxError () throws Exception {
try {
assertSearch ("hey bad boy");
assertTrue ("expected LuxException to be thrown for syntax error", false);
} catch (LuxException e) {
}
}
@Test
public void testTextComparison () throws Exception {
long t = System.currentTimeMillis();
String xpath = "//SCNDESCR >= //PERSONA";
Evaluator eval = index.makeEvaluator();
Compiler compiler = eval.getCompiler();
XQueryExecutable xquery = compiler.compile(xpath);
XdmResultSet results = eval.evaluate(xquery);
System.out.println ("query evaluated in " + (System.currentTimeMillis() - t) + " msec, retrieved " + results.size() + " result");
AbstractExpression aex = compiler.makeTranslator().queryFor(xquery).getBody();
aex = new UnOptimizer(index.indexer.getConfiguration()).unoptimize(aex);
XQueryExecutable baseline = compiler.compile(aex.toString());
XdmResultSet baseResult = eval.evaluate(baseline);
assertEquals ("result count mismatch for: " + xquery.toString(), baseResult.size(), results.size());
}
@Test
public void testComparisonPredicate () throws Exception {
long t = System.currentTimeMillis();
String xpath = "//SCNDESCR[. >= //PERSONA]";
Evaluator eval = index.makeEvaluator();
Compiler compiler = eval.getCompiler();
XQueryExecutable xquery = compiler.compile(xpath);
XdmResultSet results = eval.evaluate(xquery);
System.out.println ("query evaluated in " + (System.currentTimeMillis() - t) + " msec, retrieved " + results.size() + " results");
XQuery optimized = eval.getCompiler().makeTranslator().queryFor(xquery);
XQuery unoptimized = new UnOptimizer(index.indexer.getConfiguration()).unoptimize(optimized);
XQueryExecutable baseline = compiler.compile(unoptimized.toString());
XdmResultSet baseResult = eval.evaluate(baseline);
assertEquals ("result count mismatch for: " + xpath, baseResult.size(), results.size());
}
@Test
public void testConstantExpression() throws Exception {
// This resolves to a constant (Literal=true()) XPath expression and generates
// a null Lucene query. Make sure we don't try to execute the query.
XdmResultSet results = assertSearch("'remorseless' or descendant::text", QUERY_CONSTANT);
assertEquals (1, results.size());
}
@Test
public void testMultipleAbsolutePaths() throws Exception {
// /PLAY/PERSONAE/PGROUP/PERSONA
assertSearch("4", "count (//PERSONA[.='ROSENCRANTZ'])", 0, 4);
assertSearch("4", "count (//PERSONA[.='GUILDENSTERN'])", 0, 4);
// Our first naive implementation tried to fetch all relevant documents
// using a single database query - this test tests multiple independent
// sequences.
// we retrieved 8 documents from search, because there are two queries generated, but
// only 5 unique docs, and we cache, so only 5 docs are actually retrieved
assertSearch("8", "count (//PERSONA[.='ROSENCRANTZ']) + count(//PERSONA[.='GUILDENSTERN'])", 0, 8);
}
@Test
public void testLazyEvaluation () throws Exception {
// These expressions are optimized in the sense that lux evaluates them all by retrieving
// only the minimal number of required documents (with the available indexes).
// Note this relies on Lucene's default sort by order of insertion (ie by docid)
assertSearch ("BERNARDO", "subsequence(//SCENE, 1, 1)/SPEECH[1]/SPEAKER/string()", null, 1);
assertSearch ("BERNARDO", "(//SCENE)[1]/SPEECH[1]/SPEAKER/string()", null, 1);
// /PLAY/ACT[1]/SCENE[1], /ACT[1]/SCENE[1], /SCENE[1], /SCENE[2], /SCENE[3], /SCENE[4]
// count reduced from 6 to 4 by path queries; skip /PLAY and /ACT[1]
assertSearch ("HAMLET", "subsequence(/SCENE, 4, 1)/SPEECH[1]/SPEAKER/string()", null, 1);
}
@Test
public void testSkipDocs () throws Exception {
// Earlier implementations failed to indicate that the returned sequence of documents is sorted in document
// order, causing Saxon to pull the entire result sequence.
assertSearch ("KING CLAUDIUS", "subsequence((/)[.//SCENE], 4, 1)//SPEECH[1]/SPEAKER/string()", null, 1);
assertSearch ("BERNARDO", "(//SCENE/SPEECH)[1]/SPEAKER/string()", null, 1);
}
@Test
public void testSkipDocs2 () throws Exception {
assertSearch ("BERNARDO", "(//SCENE/SPEECH)[1]/SPEAKER/string()", null, 1);
}
@Test
public void testRoot () throws Exception {
assertSearch ("KING CLAUDIUS", "(//SCENE/root())[4]//SPEECH[1]/SPEAKER/string()", null, 1);
assertSearch ("KING CLAUDIUS", "subsequence(//SCENE/root(), 4, 1)//SPEECH[1]/SPEAKER/string()", null, 1);
}
@Test @Ignore
public void testOptimizeLast () throws Exception {
// Failed to optimize this.
//
// We should be able to retrieve the last document, and then get its last speech
// best idea for optimizing this is to add pagination to lux:search
assertSearch ("PRINCE FORTINBRAS", "(lux:search('lux_elt_name_ms:SPEECH')[last()]//SPEECH)[last()]/SPEAKER/string()", null, 1);
assertSearch ("PRINCE FORTINBRAS", "(//SPEECH)[last()]/SPEAKER/string()", null, 1164);
}
@Test
public void testIntersection () throws Exception {
assertSearch ("4", "count(/SCENE[@act='3'])", null, 4);
assertSearch ("5", "count(/SCENE[@scene='2'])", null, 5);
// saxon cleverly optimizes this and gets rid of the intersect
assertSearch ("1", "count(/SCENE[@act='3'] intersect /SCENE[@scene=2])", null, 1);
}
@Test
public void testDocumentIdentity() throws Exception {
/* This test confirms that document identity is preserved when creating Saxon documents
* because the intersect operator relies on document/node identity. Each search call
* retrieves the documents separately and uses the cache to preserve identity across multiple
* searches.
*/
assertSearch ("28", "count(lux:search('<SPEECH:Horatio')/SPEECH[contains(., 'Horatio')])", null, 40, 40);
assertSearch ("8", "count(lux:search('<SPEECH:philosophy')//SPEECH[contains(., 'philosophy')])", null, 7, 7);
// in docid order
assertSearch ("1", "count(lux:search('<SPEECH:philosophy', 'lux:docid')//SPEECH[contains(., 'philosophy')] intersect lux:search('<SPEECH:Horatio', 'lux:docid')/SPEECH[contains(., 'Horatio')])", null, 29, 29);
// in relevance order - Saxon sorts the documents
assertSearch ("1", "count(lux:search('<SPEECH:philosophy')//SPEECH[contains(., 'philosophy')] intersect lux:search('<SPEECH:Horatio')/SPEECH[contains(., 'Horatio')])", null, 47, 47);
}
/* Tests relating to element visibility
*
* setup: all elements opaque by default, LINE transparent, name transparent, SCENE a container,
* hidden is hidden
*/
@Test
public void testOpaqueElement() throws Exception {
// /PLAY/FM/P[2] contains "Bosak" but P is opaque
assertSearch ("0", "lux:count('<FM:Bosak')", null, 0, 0);
// <name> element is transparent so phrases continue through it
assertSearch ("3", "lux:count('<P:\"XML version by Jon Bosak\"')", null, 3, 0);
assertSearch ("1", "count(/FM[contains(., 'Bosak')])", null, 1, 1);
}
@Test
public void testTransparentElement() throws Exception {
// sword always occurs in LINE elements, which are transparent, so indexed as part of SPEECH
assertSearch ("5", "lux:count('<SPEECH:\"swear by my sword\"')", null, 5, 0);
assertSearch ("24", "lux:count('<SPEECH:sword')", null, 24, 0);
// content of LINE is included in <:SPEECH but not above that, since SPEECH is opaque:
assertSearch ("0", "lux:count('<ACT:sword')", null, 0, 0);
}
@Test
public void testContainerElement() throws Exception {
// SCENE is a container element; sword occurs in I;5, II;2, III;1, III;3, IV;3, IV;5, IV;7.
// V;2 has 'swords', but there's no stemming in the default analyzer:
// a PLAY, 4 ACTs, and 7 SCENEs
assertSearch ("12", "lux:count('<SCENE:sword')", null, 12, 0);
assertSearch ("3", "lux:count('<SCENE:\"Swear by my sword\"')", null, 3, 0);
// checks all 20 of the scenes
assertSearch ("1", "count(/SCENE[contains(.,\"Swear by my sword\")])", null, 20, 20);
}
@Test
public void testHiddenElement() throws Exception {
assertSearch ("0", "lux:count('<hidden:adam')", null, 0, 0);
// 2x /PLAY/ACT/SCENE/SPEECH/LINE, in the same SCENE, but not /PLAY/FM/P/name/hidden
// and we don't handle the possessive in Adam's, so only 5, not 7
assertSearch ("5", "lux:count('<LINE:adam')", null, 5, 0);
assertSearch ("0", "lux:count('name:adam')", null, 0, 0);
assertSearch ("0", "lux:count('hidden:adam')", null, 0, 0);
assertSearch ("5", "lux:count('adam')", null, 5, 0);
// phrase wraps around hidden element
assertSearch ("4", "lux:count('<name:\"michael sokolov\"')", null, 4, 0);
assertSearch ("4", "lux:count('\"michael sokolov\"')", null, 4, 0);
}
@Test
public void testDocumentOrder() throws Exception {
/* This test confirms that the document ordering asserted by the Optimizer
* is correct since if document order in Saxon
* is not the same as document order in Lucene, then the first 31st document will not be
* what we expect. 31 is a magic number because /PLAY has 20 /PLAY/ACT/SCENE,
* /ACT 1 has 5 /ACT/SCENE, then those 5 are repeated as /SCENE. The 31st should be
* /ACT[2]/SCENE[1], but since this will already have been created, its Saxon document
* number would be low using the built-in numbering scheme, and the order mismatch causes
* Saxon to terminate the intersection prematurely. */
assertSearch ("5", "count(/ACT/SCENE intersect subsequence(//SCENE, 1, 30))", null, 9, 9);
assertSearch ("6", "count(/ACT/SCENE intersect subsequence(//SCENE, 1, 31))", null, 10, 10);
}
@Test
public void testPaths () throws Exception {
// test path ordering:
assertSearch (null, "/ACT/PLAY", null, 0);
assertSearch (null, "//ACT//PLAY", null, 0);
// test path distance:
assertSearch ("Where is your son?", "string(/PLAY/ACT[4]/SCENE[1]/SPEECH[1]/LINE[3])", null, 1);
// Q: who decides what serialization to use?
//assertSearch ("Where is your son?", "/PLAY/ACT[4]/SCENE[1]/SPEECH[1]/LINE[3]", null, 1);
assertSearch ("Where is your son?", "string((/PLAY/ACT[4]/*/*/LINE)[3])", null, 1);
// no result, but we can't tell from the query and have to retrieve the document and process it
assertSearch (null, "/PLAY/ACT[4]/*/*/*/*/LINE", null, 1);
}
@Test
public void testReversePaths () throws Exception {
// expresses a deepish path in reverse order, using predicates
assertSearch ("Where is your son?", "string(//LINE[3]" +
"[parent::SPEECH[not(preceding-sibling::SPEECH)]]" +
"[ancestor::SCENE[count(preceding-sibling::SCENE)=0]]" +
"[ancestor::ACT[count(preceding-sibling::ACT)=3]]" +
"[ancestor::PLAY])", null, 1);
assertSearch ("Where is your son?", "string(//ACT[4]/SCENE[1]/SPEECH[1]/LINE[3]" +
"[../../../../self::PLAY[.. is root()]])", null, 1);
}
@Test
public void testElementFullTextPhrase () throws Exception {
// test phrase query generation
// also handling of capitalization and tokenization (w/punctuation)
assertSearch ("5", "count(//LINE[.='Holla! Bernardo!'])", null, 5, 5);
assertSearch ("0", "count(//LINE[.='Holla!'])", null, 5, 5);
assertSearch ("0", "count(//LINE[.='Holla Bernardo'])", null, 5, 5);
// We cannot optimize this one due to the function call around (.):
assertSearch ("1", "count(/ACT//LINE[lower-case(.)='holla! bernardo!'])", null, 5, 5);
// ensure that paths ending in Dot don't accidentally reference the outer context
assertSearch ("0", "count(/ACT//LINE[FOO//.='holla! bernardo!'])", null, 0, 0);
// check stop word handling
assertSearch ("<LINE>Where is your son?</LINE>", "//LINE[.='Where is your son?']", null, 5, 5);
}
@Test public void testFullText () throws Exception {
assertSearch ("Where is your son?", "//*[.='Where is your son?']/string()", null, 5, 5);
}
@Test public void testContains () throws Exception {
/*
* When we had a contains optimization:
assertSearch ("5", "count(//LINE[contains(.,'Holla')])", null, 5, 5);
assertSearch ("true", "contains(/PLAY,'Holla')", null, 1, 1);
// searches match all 10 instances of 'given' since they will be case-insensitive
// There is also one occurrence of 'forgiveness' that should match
assertSearch ("1", "count (/LINE[contains(.,'Given')])", null, 11, 11);
assertSearch ("10", "count (/LINE[contains(.,'given')])", null, 11, 11);
*/
int lineCount = index.elementCounts.get("LINE");
// has to check every /LINE document:
assertSearch ("1", "count(/LINE[contains(.,'olla! Bern')])", null, lineCount, lineCount);
}
@Test public void testLuxSearch () throws Exception {
assertSearch ("5", "count(lux:search('\"holla bernardo\"'))", null, 5, 0);
assertSearch ("5", "count(lux:search('<:\"holla bernardo\"'))", null, 5, 0);
assertSearch ("5", "count(lux:search('<LINE:\"holla bernardo\"'))", null, 5, 0);
try {
assertSearch (null, "lux:search(1,2,3)", null, null, null);
assertTrue ("expected exception not thrown", false);
} catch (LuxException e) { }
try {
assertSearch (null, "lux:search(1,2)", null, null, null);
assertTrue ("expected exception not thrown", false);
} catch (LuxException e) { }
try {
assertSearch (null, "lux:search(':::')", null, null, null);
assertTrue ("expected exception not thrown", false);
} catch (LuxException e) {
assertTrue (e.getMessage(), e.getMessage().startsWith("Cannot parse ':::'"));
}
assertSearch ("65", "lux:count(text{'bernardo'})", null, 65, 0);
}
@Test
public void testLuxSearchPath () throws Exception {
assertSearch ("1", "count(lux:search('\"holla bernardo\"')/SPEECH)", null, 5, 5);
}
/* Bug found in the wild - the opto that preserves document-ordering by embedding the trailing path
* in a predicate applied an incorrect query. */
@Test
public void testLuxSearchRoot () throws Exception {
// This is the actual bug:
String query = "lux:search (\"<@scene:5\")[1]/root()";
assertSearch ("__IGNORE__", query, null, 1, 1);
// Some attempts to reproduce, kept for posterity?
// first result is LINE due to TFIDF (relevance) scoring
assertSearch ("LINE", "lux:search('\"holla bernardo\"')[1]/root()/*/name()", null, 1, 1);
assertSearch (null, "lux:search('<@id:100')[1]/root()/*/name()", null, 0, 0);
}
@Test
public void testBugFix0018() throws Exception {
assertSearch ("MARCELLUS", "for $doc in /SPEECH[LINE='Holla! Bernardo!'] return $doc/SPEAKER/string()", null, 1, 1);
}
@Test
public void testEmptyReturn() throws Exception {
assertSearch (null, "for $doc in /SPEECH[LINE='Holla! Bernardo!'] return $doc/UNKNOWN/string()", null, 0, 0);
}
@Test
public void testBugFix0018b() throws Exception {
assertSearch (HAMLET_TITLE_MARKUP, "lux:search(\"*:*\")[2]", null, 1, 1);
}
@Test
public void testTrailingStringCall () throws Exception {
assertSearch ("Where is your son?", "/PLAY/ACT[4]/SCENE[1]/SPEECH[1]/LINE[3]/string()", null, 1);
}
@Test
public void testOrderBy () throws Exception {
// TODO: we don't yet have a solution that allows us to push the order by
// optimization (to say nothing of additional constraints) into a user-supplied
// query using the string query syntax.
assertSearch ("ACT", "(for $doc in lux:search('bernardo')" +
" order by lux:key('doctype', $doc) return $doc/*/name())[1]", 0, 1);
}
@Test
public void testOrderByPagination () throws Exception {
assertSearch ("SPEAKER", "(for $doc in lux:search('bernardo')" +
" order by lux:key('doctype', $doc) return $doc/*/name())[21]", 0, 1);
assertSearch ("<SPEAKER>BERNARDO</SPEAKER>", "(for $doc in lux:search('bernardo')" +
" order by lux:key('doctype', $doc) return $doc)[21]", 0, 1);
}
@Test
public void testHighlight () throws Exception {
assertSearch ("<TITLE>The Tragedy of <B>Hamlet</B>, Prince of Denmark</TITLE>",
"lux:highlight(/PLAY/TITLE, 'hamlet')", null, null);
}
@Test
public void testHighlightMultiple () throws Exception {
assertSearch ("<TITLE>The <B>Tragedy</B> <B>of</B> <B>Hamlet</B>, Prince <B>of</B> Denmark</TITLE>",
"lux:highlight(/PLAY/TITLE,'tragedy of hamlet')", null, null);
}
@Test
public void testHighlightPhrase () throws Exception {
assertSearch ("<TITLE>The <B>Tragedy</B> <B>of</B> <B>Hamlet</B>, Prince of Denmark</TITLE>",
"lux:highlight(/PLAY/TITLE, '\"tragedy of hamlet\"')", null, null);
}
@Test
public void testHighlightElementQuery () throws Exception {
assertSearch ("<TITLE>The Tragedy of <B>Hamlet</B>, Prince of Denmark</TITLE>",
"lux:highlight(/PLAY/TITLE, '<TITLE:hamlet')", null, null);
}
@Test
public void testHighlightElementMultiple () throws Exception {
assertSearch ("<TITLE>The <B>Tragedy</B> of <B>Hamlet</B>, Prince of Denmark</TITLE>",
"lux:highlight(/PLAY/TITLE, '<TITLE:hamlet <TITLE:tragedy')", null, null);
}
// Highlighting element-phrase-queries is not well-supported by the current highlighter.
// because the Lucene phrase highlighting is restricted to operate on a single field,
// and we use the main text field. So we choose to err on thse side of over-highlighting
// using a workaround that ignores element restrictions in the presence of phrase queries.
@Test
public void testHighlightElementPhrase () throws Exception {
assertSearch ("<TITLE>The <B>Tragedy</B> <B>of</B> <B>Hamlet</B>, Prince of Denmark</TITLE>",
"lux:highlight(/PLAY/TITLE, '<TITLE:\"tragedy of hamlet\"')", null, null);
}
@Test
public void testHighlightMixedQuery () throws Exception {
assertSearch ("<TITLE>The <B>Tragedy</B> <B>of</B> <B>Hamlet</B>, Prince of Denmark</TITLE>",
"lux:highlight(/PLAY/TITLE, '<TITLE:tragedy \"of hamlet\"')", null, null);
}
@Test
public void testHighlightAttributeQuery () throws Exception {
// no highlighting in attributes
assertSearch ("<node id=\"10\">node 10</node>", "lux:highlight(<node id=\"10\">node 10</node>, '<@id:10')", null, null);
}
// Make sure text offset calculations handle multiple text nodes
@Test
public void testHighlightComplexContent() throws Exception {
assertSearch ("<FM>\n<P>Text placed in the public domain by Moby Lexical Tools, 1992.</P>\n" +
"<P>SGML markup by <name><B>Jon</B> <B>Bosak</B></name>, 1992-1994.</P>\n" +
"<P>XML version by <name><B>Jon</B> <B>Bosak</B></name>, 1996-1998.</P>\n" +
"<P>XML attributes and name tagging added by <name>Michael <hidden>Adam</hidden> Sokolov</name>, 2013</P>\n" +
"<P>This work may be freely copied and distributed worldwide.</P>\n</FM>",
"lux:highlight(/FM, 'Jon Bosak')", null, null);
}
@Test
public void testFunctionDefinition() throws Exception {
// test the optimization of function bodies, and also of the collection()
// function call. collection() must be used inside function bodies since
// Saxon generates static (compilation) errors due to the lack of a context item
// for the absolute paths
assertSearch (HAMLET_TITLE_MARKUP,
"declare namespace test='test'; " +
"declare function test:function () { collection()/PLAY/TITLE }; " +
"test:function()", null, 1);
}
@Test
public void testFlworPath() throws Exception {
// test that a let clause that doesn't match any data doesn't prevent the
// entire query from matching
assertSearch (HAMLET_TITLE_MARKUP, "let $play := collection()/PLAY[TITLE]\n" +
"return ($play/TITLE)", null, 1);
}
@Test
public void testSequence() throws Exception {
// test that queries generated by a sequence combine properly (ie using OR)
// if $play = () then we should get back ''.
assertSearch (HAMLET_TITLE, "let $play := collection()/PLAY[TITLE]\n" +
"return (string($play/TITLE), $play/@id)", null, 1);
}
@Test
public void testLet() throws Exception {
assertSearch (HAMLET_TITLE, "let $play := collection()/PLAY[TITLE]\n" +
"let $id := $play/@id\n" +
"return (string($play/TITLE), $id)", null, 1);
}
@Test
public void testIrrelevantLet() throws Exception {
// test that a let clause that doesn't match any data doesn't prevent the
// entire query from matching. This is hard to do b/c Saxon tends
// to optimize away so much. This also provides an opportunity to ensure
// that we optimize through the use of variables, since the variables
// are preserved in this expression, even after Saxon's optos.
assertSearch (HAMLET_TITLE, "let $play := collection()/PLAY[starts-with(TITLE,'The ')]\n" +
"let $id := $play/@id\n" +
"return <result id='{$id}'>{if ($id) then '' else $play/TITLE}</result>/string()", null, 1);
}
@Test
public void testDeepPagination () throws Exception {
//ensure that deep pagination skips intervening documents without loading them into memory
assertSearch ("1", "count(collection()[1000]/*)", null, 1);
}
@Test
public void testWhereAtClause () throws Exception {
// return the index of the first /SCENE document ; the first SCENE is the 49th element in hamlet.xml,
// and therefore the root of document #49 in the test set
String query = "(for $doc at $i in collection() where $doc/SCENE return $i)[1]";
assertSearch ("49", query, null, 49);
}
@Test
public void testFieldValuesComparison () throws Exception {
String query = "collection()[lux:key('doctype')='SCENE'][1]/descendant::SPEECH[1]/SPEAKER/string()";
// there are 20 scenes in Hamlet, but we only need to pull the first one for this query
assertSearch ("BERNARDO", query, null, 1, 1);
query = "collection()[lux:key('doctype')='SCENE'][1]/descendant::SPEAKER[1]/string()";
assertSearch ("BERNARDO", query, null, 1, 1);
query = "count(collection()[lux:key('doctype')='SCENE'])";
assertSearch ("20", query, null, 20, 0);
}
@Test
public void testPredicateChain() throws Exception {
String query = "count(//ACT[1]/SCENE[2]/SPEECH[3]/SPEAKER)";
assertSearch ("6", query, null, 6, 6);
}
@Test
public void testNestedPredicateComparison() throws Exception {
String query = "exists(/PLAY[ACT[SCENE/TITLE='SCENE IV. The platform.']])";
assertSearch ("true", query, null, 1, 1);
}
@Test
public void testRangeInequality() throws Exception {
// we have five ACTs
String query = "count((/)[lux:key('doctype') <= 'ACT'])";
assertSearch ("5", query, null, 5, 0);
query = "count((/)[lux:key('doctype') le 'ACT'])";
assertSearch ("5", query, null, 5, 0);
}
@Test
public void testCombinedRange() throws Exception {
String query = "count((/)[lux:key('doctype') >= 'A'][lux:key('doctype') <= 'B'])";
assertSearch ("5", query, null, 5, 0);
// we have one FM and two GRPDESCR
query = "count((/)[lux:key('doctype') > 'F'][lux:key('doctype') < 'H'])";
assertSearch ("3", query, null, 3, 0);
query = "count((/)[lux:key('doctype') gt 'F' and lux:key('doctype') lt 'H'])";
assertSearch ("3", query, null, 3, 0);
}
@Test
public void testRangeGenComp() throws Exception {
// we have only one SCNDESCR, but we don't optimize this yet
String query = "count((/)[lux:key('doctype') gt 'S' and lux:key('doctype') lt 'T' " +
"and not(lux:key('doctype') = ('SCENE','SPEECH','SPEAKER','STAGEDIR'))])";
assertSearch ("1", query, null, 2552, 2552);
}
@Test
public void testFieldValuesNoContext () throws Exception {
// compare an integer against a string-valued field
String query = "if (2 eq lux:key('xxx')) then 'yes' else 'no'";
try {
assertSearch ("false", query, null, 0, 0);
fail ("expected exception not thrown");
} catch (LuxException e) {
assertTrue (e.getMessage().contains("no context defined"));
}
}
@Test
public void testIntFieldEquality() throws Exception {
String query;
// check that our int-valued field was indexed correctly:
query = "(/ACT)[2]/lux:key('actnum')";
assertSearch ("2", query, null, 1, 1);
// do a basic int comparison
query = "count(collection()[2 eq lux:key('actnum')])";
assertSearch ("3", query, null, 3, 0);
// Try comparing an integer against a string-valued field
query = "count(collection()[2 eq lux:key('actstr')])";
try {
assertSearch ("0", query, null, 0, 0);
fail ("expected exception not thrown");
} catch (LuxException e) {
assertEquals("Cannot compare xs:integer to xs:string", e.getMessage());
}
}
@Test
public void testIntFieldInequality() throws Exception {
String query;
// do a basic int comparison
query = "count(collection()[lux:key('actnum') lt 2])";
assertSearch ("6", query, null, 6, 0);
query = "count(collection()[lux:key('actnum') < 2])";
assertSearch ("6", query, null, 6, 0);
query = "count(collection()[lux:key('actnum') > 2][lux:key('actnum') <= 3])";
assertSearch ("5", query, null, 5, 0);
}
@Test
public void testLongFieldInequality() throws Exception {
// do a basic long comparison, and make sure comparison with other numeric types is allowed
String query = "count(collection()[lux:key('scnlong') gt xs:int(5)])";
assertSearch ("2", query, null, 2, 0);
}
@Test @Ignore
public void testXPathRangeQuery () throws Exception {
String query;
// do a basic integer comparison; the cast is required for atomic comparison
query = "count(//SCENE[xs:integer(@act) lt 2])";
assertSearch ("8", query, null, 6, 6);
query = "count(//SCENE[@act < 2])";
assertSearch ("6", query, null, 4, 4);
query = "count(//SCENE[xs:integer(@act) > 2][xs:integer(@act) <= 3])";
assertSearch ("9", query, null, 7, 7);
}
@Test
public void testAttributePredicate() throws Exception {
// from Geet Gangwar
//context /@id[.='I2009']
String query = "count(//SCENE/@act[.='2'])";
assertSearch ("6", query, null, 4, 4);
query = "//SCENE/@act[.='2']";
XdmResultSet results = assertSearch (query, (Integer) null, 4, 4);
assertEquals (6, results.getXdmValue().size());
}
/* See LUX-62 */
@Test @Ignore
public void testTimestampRange () throws Exception {
String query = "lux:count('timestamp:[2013-09-01T21:30:50.515Z TO NOW]')";
assertSearch ("6636", query, null, 0, 0);
}
@Test
public void testEmptySort() throws Exception {
assertSearch ("PLAY", "name(lux:search('yorick',())[1]/*)", null, 1, 1);
}
}
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
* You can obtain one at http://mozilla.org/MPL/2.0/. */