package org.exist.indexing.ngram; import org.custommonkey.xmlunit.NamespaceContext; import org.custommonkey.xmlunit.SimpleNamespaceContext; import org.custommonkey.xmlunit.XMLAssert; import org.custommonkey.xmlunit.XMLUnit; import org.exist.collections.Collection; import org.exist.collections.CollectionConfigurationManager; import org.exist.collections.IndexInfo; import org.exist.security.xacml.AccessContext; import org.exist.storage.BrokerPool; import org.exist.storage.DBBroker; import org.exist.storage.serializers.EXistOutputKeys; import org.exist.storage.serializers.Serializer; import org.exist.storage.txn.TransactionManager; import org.exist.storage.txn.Txn; import org.exist.test.TestConstants; import org.exist.util.Configuration; import org.exist.util.ConfigurationHelper; import org.exist.xmldb.XmldbURI; import org.exist.xquery.XPathException; import org.exist.xquery.XQuery; import org.exist.xquery.value.NodeValue; import org.exist.xquery.value.Sequence; import org.junit.AfterClass; import static org.junit.Assert.*; import org.junit.BeforeClass; import org.junit.Test; import org.xml.sax.SAXException; import javax.xml.transform.OutputKeys; import java.io.File; import java.util.HashMap; import java.util.Properties; public class MatchListenerTest { private static String XML = "<root>" + " <para>some paragraph with <hi>mixed</hi> content.</para>" + " <para>another paragraph with <note><hi>nested</hi> inner</note> elements.</para>" + " <para>a third paragraph with <term>term</term>.</para>" + " <para>double match double match</para>" + "</root>"; private static String XML2 = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" + "<p xmlns=\"http://www.tei-c.org/ns/1.0\" xml:id=\"pT01p0257c1501\">爾時會中。有一尊者。名曰龍護。手執寶拂。 \n" + " <lb n=\"0257c16\" ed=\"T\"/>侍立佛側。時尊者龍護白佛言。世尊。我見 \n" + " <lb n=\"0257c17\" ed=\"T\"/>諸邪外道尼乾子等。於佛世尊。先不起信。 \n" + " <lb n=\"0257c18\" ed=\"T\"/>唯於邪道。競說勝能。是故我今建立表剎 \n" + " <lb n=\"0257c19\" ed=\"T\"/>宣示於世。咸使聞知佛勝功德。於佛世尊。是 \n" + " <lb n=\"0257c20\" ed=\"T\"/>大丈夫。最尊最上。無有等者。\n" + "</p>"; private static String CONF1 = "<collection xmlns=\"http://exist-db.org/collection-config/1.0\">" + " <index>" + " <fulltext default=\"none\">" + " </fulltext>" + " <ngram qname=\"para\"/>" + " <ngram qname=\"term\"/>" + " </index>" + "</collection>"; private static String CONF2 = "<collection xmlns=\"http://exist-db.org/collection-config/1.0\">" + " <index>" + " <fulltext default=\"none\">" + " </fulltext>" + " <ngram qname=\"note\"/>" + " </index>" + "</collection>"; private static String CONF3 = "<collection xmlns=\"http://exist-db.org/collection-config/1.0\">" + " <index xmlns:tei=\"http://www.tei-c.org/ns/1.0\">" + " <fulltext default=\"none\">" + " </fulltext>" + " <ngram qname=\"tei:p\"/>" + " </index>" + "</collection>"; private static String MATCH_START = "<exist:match xmlns:exist=\"http://exist.sourceforge.net/NS/exist\">"; private static String MATCH_END = "</exist:match>"; private static BrokerPool pool; @Test public void nestedContent() { DBBroker broker = null; try { configureAndStore(CONF1, XML); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); Sequence seq = xquery.execute("//para[ngram:contains(., 'mixed')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>some paragraph with <hi>" + MATCH_START + "mixed" + MATCH_END + "</hi> content.</para>", result); seq = xquery.execute("//para[ngram:contains(., 'content')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>some paragraph with <hi>mixed</hi> " + MATCH_START + "content" + MATCH_END + ".</para>", result); seq = xquery.execute("//para[ngram:contains(., 'nested')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>another paragraph with <note><hi>" + MATCH_START + "nested" + MATCH_END + "</hi> inner</note> elements.</para>", result); seq = xquery.execute("//para[ngram:contains(., 'content') and ngram:contains(., 'mixed')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>some paragraph with <hi>" + MATCH_START + "mixed" + MATCH_END + "</hi> " + MATCH_START + "content" + MATCH_END + ".</para>", result); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @Test public void matchInParent() { DBBroker broker = null; try { configureAndStore(CONF1, XML); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); Sequence seq = xquery.execute("//para[ngram:contains(., 'mixed')]/hi", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<hi>" + MATCH_START + "mixed" + MATCH_END + "</hi>", result); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @Test public void matchInAncestor() { DBBroker broker = null; try { configureAndStore(CONF1, XML); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); Sequence seq = xquery.execute("//para[ngram:contains(., 'nested')]/note", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<note><hi>" + MATCH_START + "nested" + MATCH_END + "</hi> inner</note>", result); seq = xquery.execute("//para[ngram:contains(., 'nested')]//hi", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<hi>" + MATCH_START + "nested" + MATCH_END + "</hi>", result); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @Test public void nestedIndex() { DBBroker broker = null; try { configureAndStore(CONF1, XML); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); Sequence seq = xquery.execute("//para[ngram:contains(term, 'term')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>a third paragraph with <term>" + MATCH_START + "term" + MATCH_END + "</term>.</para>", result); seq = xquery.execute("//term[ngram:contains(., 'term')]/..", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>a third paragraph with <term>" + MATCH_START + "term" + MATCH_END + "</term>.</para>", result); seq = xquery.execute("//term[ngram:contains(., 'term')]/ancestor::para", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>a third paragraph with <term>" + MATCH_START + "term" + MATCH_END + "</term>.</para>", result); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @Test public void mixedContentQueries() { DBBroker broker = null; try { configureAndStore(CONF1, XML); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); Sequence seq = xquery.execute("//para[ngram:contains(., 'mixed content')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>some paragraph with <hi>" + MATCH_START + "mixed" + MATCH_END + "</hi>" + MATCH_START + " content" + MATCH_END + ".</para>", result); seq = xquery.execute("//para[ngram:contains(., 'with mixed content')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>some paragraph " + MATCH_START + "with " + MATCH_END + "<hi>" + MATCH_START + "mixed" + MATCH_END + "</hi>" + MATCH_START + " content" + MATCH_END + ".</para>", result); seq = xquery.execute("//para[ngram:contains(., 'with nested')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>another paragraph " + MATCH_START + "with " + MATCH_END + "<note><hi>" + MATCH_START + "nested" + MATCH_END + "</hi> inner</note> elements.</para>", result); seq = xquery.execute("//para[ngram:contains(., 'with nested inner elements')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>another paragraph " + MATCH_START + "with " + MATCH_END + "<note><hi>" + MATCH_START + "nested" + MATCH_END + "</hi>" + MATCH_START + " inner" + MATCH_END + "</note>" + MATCH_START + " elements" + MATCH_END + ".</para>", result); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @Test public void indexOnInnerElement() { DBBroker broker = null; try { configureAndStore(CONF2, XML); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); Sequence seq = xquery.execute("//para[ngram:contains(note, 'nested inner')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>another paragraph with <note><hi>" + MATCH_START + "nested" + MATCH_END + "</hi>" + MATCH_START + " inner" + MATCH_END + "</note> elements.</para>", result); seq = xquery.execute("//note[ngram:contains(., 'nested inner')]/parent::para", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>another paragraph with <note><hi>" + MATCH_START + "nested" + MATCH_END + "</hi>" + MATCH_START + " inner" + MATCH_END + "</note> elements.</para>", result); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @Test public void doubleMatch() { DBBroker broker = null; try { configureAndStore(CONF1, XML); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); Sequence seq = xquery.execute("//para[ngram:contains(., 'double match')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertEquals("<para>" + MATCH_START + "double match" + MATCH_END + " " + MATCH_START + "double match" + MATCH_END + "</para>", result); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @Test public void smallStrings() { DBBroker broker = null; try { configureAndStore(CONF3, XML2); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); String[] strings = new String[] { "龍", "龍護", "曰龍護", "名曰龍護" }; for (int i = 0; i < strings.length; i++) { Sequence seq = xquery.execute( "declare namespace tei=\"http://www.tei-c.org/ns/1.0\";\n" + "//tei:p[ngram:contains(., '" + strings[i] + "')]", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertXpathEvaluatesTo(i < 2 ? "2" : "1", "count(//exist:match)", result); XMLAssert.assertXpathExists("//exist:match[text() = '" + strings[i] + "']", result); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @Test public void constructedNodes() { DBBroker broker = null; try { configureAndStore(CONF3, XML2); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); XQuery xquery = broker.getXQueryService(); assertNotNull(xquery); String[] strings = new String[] { "龍", "龍護", "曰龍護", "名曰龍護" }; for (int i = 0; i < strings.length; i++) { Sequence seq = xquery.execute( "declare namespace tei=\"http://www.tei-c.org/ns/1.0\";\n" + "for $para in //tei:p[ngram:contains(., '" + strings[i] + "')]\n" + "return\n" + " <match>{$para}</match>", null, AccessContext.TEST); assertNotNull(seq); assertEquals(1, seq.getItemCount()); String result = queryResult2String(broker, seq); System.out.println("RESULT: " + result); XMLAssert.assertXpathEvaluatesTo(i < 2 ? "2" : "1", "count(//exist:match)", result); XMLAssert.assertXpathExists("//exist:match[text() = '" + strings[i] + "']", result); } } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } @BeforeClass public static void startDB() { DBBroker broker = null; TransactionManager transact = null; Txn transaction = null; try { File confFile = ConfigurationHelper.lookup("conf.xml"); Configuration config = new Configuration(confFile.getAbsolutePath()); BrokerPool.configure(1, 5, config); pool = BrokerPool.getInstance(); assertNotNull(pool); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); assertNotNull(broker); transact = pool.getTransactionManager(); assertNotNull(transact); transaction = transact.beginTransaction(); assertNotNull(transaction); System.out.println("Transaction started ..."); Collection root = broker.getOrCreateCollection(transaction, TestConstants.TEST_COLLECTION_URI); assertNotNull(root); broker.saveCollection(transaction, root); transact.commit(transaction); } catch (Exception e) { transact.abort(transaction); e.printStackTrace(); fail(e.getMessage()); } finally { if (pool != null) pool.release(broker); } HashMap m = new HashMap(); m.put("tei", "http://www.tei-c.org/ns/1.0"); m.put("exist", "http://exist.sourceforge.net/NS/exist"); NamespaceContext ctx = new SimpleNamespaceContext(m); XMLUnit.setXpathNamespaceContext(ctx); } @AfterClass public static void closeDB() { BrokerPool pool = null; DBBroker broker = null; TransactionManager transact = null; Txn transaction = null; try { pool = BrokerPool.getInstance(); assertNotNull(pool); broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); assertNotNull(broker); transact = pool.getTransactionManager(); assertNotNull(transact); transaction = transact.beginTransaction(); assertNotNull(transaction); System.out.println("Transaction started ..."); Collection root = broker.getOrCreateCollection(transaction, TestConstants.TEST_COLLECTION_URI); assertNotNull(root); broker.removeCollection(transaction, root); Collection config = broker.getOrCreateCollection(transaction, XmldbURI.create(CollectionConfigurationManager.CONFIG_COLLECTION + "/db")); assertNotNull(config); broker.removeCollection(transaction, config); transact.commit(transaction); } catch (Exception e) { transact.abort(transaction); e.printStackTrace(); fail(e.getMessage()); } finally { if (pool != null) pool.release(broker); } BrokerPool.stopAll(false); } private void configureAndStore(String config, String xml) { DBBroker broker = null; TransactionManager transact = null; Txn transaction = null; try { broker = pool.get(org.exist.security.SecurityManager.SYSTEM_USER); assertNotNull(broker); transact = pool.getTransactionManager(); assertNotNull(transact); transaction = transact.beginTransaction(); assertNotNull(transaction); Collection root = broker.getOrCreateCollection(transaction, TestConstants.TEST_COLLECTION_URI); assertNotNull(root); CollectionConfigurationManager mgr = pool.getConfigurationManager(); mgr.addConfiguration(transaction, broker, root, config); IndexInfo info = root.validateXMLResource(transaction, broker, XmldbURI.create("test_matches.xml"), xml); assertNotNull(info); root.store(transaction, broker, info, xml, false); transact.commit(transaction); } catch (Exception e) { transact.abort(transaction); e.printStackTrace(); fail(e.getMessage()); } finally { pool.release(broker); } } private String queryResult2String(DBBroker broker, Sequence seq) throws SAXException, XPathException { Properties props = new Properties(); props.setProperty(OutputKeys.INDENT, "no"); props.setProperty(EXistOutputKeys.HIGHLIGHT_MATCHES, "elements"); Serializer serializer = broker.getSerializer(); serializer.reset(); serializer.setProperties(props); return serializer.serialize((NodeValue) seq.itemAt(0)); } }