/* * The contents of this file are subject to the Mozilla Public License * Version 1.1 (the "License"); you may not use this file except in * compliance with the License. You may obtain a copy of the License at * http://www.mozilla.org/MPL/ * * Software distributed under the License is distributed on an "AS IS" * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See * the License for the specific language governing rights and limitations * under the License. * * The Original Code is the Kowari Metadata Store. * * The Initial Developer of the Original Code is Plugged In Software Pty * Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions * created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002 * Plugged In Software Pty Ltd. All Rights Reserved. * * Contributor(s): N/A. * * [NOTE: The text of this Exhibit A may differ slightly from the text * of the notices in the Source Code files of the Original Code. You * should use the text of this Exhibit A rather than the text found in the * Original Code Source Code for Your Modifications.] * */ package org.mulgara.resolver.lucene; // Java 2 standard packages import java.io.File; import java.io.FileInputStream; import java.io.FilenameFilter; import java.io.InputStreamReader; import java.io.IOException; import java.io.Reader; import java.util.ArrayList; import java.util.List; // 3rd party import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; import org.apache.log4j.Logger; import org.mulgara.util.TempDir; /** * Test cases for FullTextStringIndex. * * @author Tate Jones * * @created 2002-03-17 * * @version $Revision: 1.8 $ * * @modified $Date: 2005/01/05 04:58:47 $ by $Author: newmana $ * * @maintenanceAuthor $Author: newmana $ * * @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A> * * @copyright © 2002-2003 <A href="http://www.PIsoftware.com/">Plugged In * Software Pty Ltd</A> * * @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a> */ public class FullTextStringIndexUnitTest extends TestCase { /** Directory for the indexes */ private final static String indexDirectory = TempDir.getTempDir().getPath() + File.separator + "fulltextsp"; /** The directory containing the text documents */ private final static String textDirectory = System.getProperty("cvs.root") + File.separator + "data" + File.separator + "fullTextTestData"; /** Logger */ private final static Logger logger = Logger.getLogger(FullTextStringIndexUnitTest.class); /** Hold a list of test data */ private List<String> theStrings = new ArrayList<String>(); /** * Create the testing class * * @param name The name of the test. */ public FullTextStringIndexUnitTest(String name) { super(name); } /** * Hook for test runner to obtain a test suite from. * * @return The test suite to run. */ public static Test suite() { TestSuite suite = new TestSuite(); suite.addTest(new FullTextStringIndexUnitTest("testFullTextStringPool")); suite.addTest(new FullTextStringIndexUnitTest("testFullTextStringPoolCornerCases")); suite.addTest(new FullTextStringIndexUnitTest("testFullTextStringPoolwithFiles")); suite.addTest(new FullTextStringIndexUnitTest("testFullTextStringPoolTransactions")); return suite; } /** * Default test runner. * * @param args The command line arguments */ public static void main(String[] args) { junit.textui.TestRunner.run(suite()); } /** * Creates a new index required to do the testing. * * @throws IOException Description of Exception */ public void setUp() throws IOException { //Populate a list of strings theStrings.add("AACP Pneumothorax Consensus Group"); theStrings.add("ALS-HPS Steering Group"); theStrings.add( "ALSPAC (Avon Longitudinal Study of Parents and Children) Study Team"); theStrings.add("ALTS Study group"); theStrings.add("American Academy of Asthma, Allergy and Immunology"); theStrings.add("American Association for the Surgery of Trauma"); theStrings.add("American College of Chest Physicians"); theStrings.add( "Antiarrhythmics Versus Implantable Defibrillator (AVID) Trial Investigators"); theStrings.add("Antibiotic Use Working Group"); theStrings.add("Atypical Squamous Cells Intraepithelial"); theStrings.add("Lesion Triage Study (ALTS) Group"); theStrings.add( "Australasian Society for Thrombosis and Haemostasis (ASTH) Emerging Technologies Group"); theStrings.add("Benefit Evaluation of Direct Coronary Stenting Study Group"); theStrings.add("Biomarkers Definitions Working Group."); theStrings.add( "Canadian Colorectal Surgery DVT Prophylaxis Trial investigators"); theStrings.add("Cancer Research Campaign Phase I - II Committee"); theStrings.add("Central Technical Coordinating Unit"); theStrings.add( "Clinical Epidemiology Group from the French Hospital Database on HIV"); theStrings.add("CNAAB3005 International Study Team"); theStrings.add("Commissione ad hoc"); theStrings.add("Committee to Advise on Tropical Medicine and Travel"); theStrings.add( "Comparison of Candesartan and Amlodipine for Safety, Tolerability and Efficacy (CASTLE) Study Investigators"); theStrings.add( "Council on Scientific Affairs, American Medical Association"); theStrings.add( "Dana Consortium on the Therapy of HIV-Dementia and Related Cognitive Disorders"); theStrings.add("Danish Committee on Scientific Dishonesty"); theStrings.add("Dengue Network Philippines"); theStrings.add("Donepezil Study Group"); theStrings.add("EBPG (European Expert Group on Renal Transplantation)"); theStrings.add( "Arbeitsgemeinschaft Dermatologische Histologie (ADH) der DDG."); theStrings.add("EORTC Early Clinical Studies Group"); theStrings.add("European Renal Association (ERA-EDTA)"); theStrings.add("European Society for Organ Transplantation (ESOT)"); theStrings.add("European Study Investigators"); theStrings.add("European Canadian Glatiramer Acetate Study Group"); theStrings.add("FAMI Investigator Group"); theStrings.add("French EGEA study"); theStrings.add("French National Medical and Health Research Institute"); theStrings.add( "French Parkinson's Disease Genetics Study Group. The European Consortium on Genetic"); theStrings.add("Susceptibility in Parkinson's Disease"); theStrings.add("German Hodgkin Study Group"); theStrings.add("Groupe d'Etude des Lymphomes de l'Adulte (GELA)"); theStrings.add( "Groupe d'Etude et de Recherche Clinique en Oncologie Radiotherapies"); theStrings.add("Hemophilia Behavioral Intervention Study Group"); theStrings.add("Hepatitis Interventional Therapy Group"); theStrings.add("HIV Epidemiology Research Study Group"); theStrings.add("Houston Congenital CMV Longitudinal Study Group"); theStrings.add( "International Council for Science's Standing Committee on Responsibility and Ethics in Science"); theStrings.add("International Evidence-Based Group for Neonatal Pain"); theStrings.add("one"); theStrings.add("one two"); theStrings.add("one two three"); theStrings.add("holidays"); } /** * Closes the index used for testing. * * @throws IOException Description of Exception */ public void tearDown() throws IOException { } /** * 1. Test the loading of strings into the fulltext string pool 2. Checking * for existance 3. Test non-stemming 4. Test removal of strings * * @throws Exception Test fails */ public void testFullTextStringPool() throws Exception { LuceneIndexerCache cache = new LuceneIndexerCache(indexDirectory); FullTextStringIndex index = null; try { // Ensure that reverse search is enabled. String document = "http://mulgara.org/mulgara/document#"; String has = "http://mulgara.org/mulgara/document#has"; //Clean any existing indexes. cache.close(); cache.removeAllIndexes(); cache = new LuceneIndexerCache(indexDirectory); //create the index index = new FullTextStringIndex(cache, true, true); // Add strings to the index for (String literal : theStrings) { index.add(document, has, literal); } index.commit(); index.close(); index = new FullTextStringIndex(cache, true, true); // Find the strings from the index with both subject & predicate for (String literal : theStrings) { testHas("failed to find '" + literal + "'", index, document, has, literal); } // Find the strings from the index with only subject for (String literal : theStrings) { testHas("failed to find '" + literal + "'", index, document, null, literal); } // Find the strings from the index with only predicate for (String literal : theStrings) { testHas("failed to find '" + literal + "'", index, null, has, literal); } testFind("Stemming match search failed", 0, index, null, null, "\"holiday\""); /* Enable when TODO in remove() is fixed assertFalse("Should not be able to delete fulltext literal due to incorrect value", index.remove(document, has, "holiday")); */ index.remove(document, has, "one two"); index.remove(document, has, "one"); index.remove(document, has, "one two three"); index.commit(); index.close(); index = new FullTextStringIndex(cache, true, true); testFind("Presumed deleted but found 'one two'", 0, index, document, has, "one two"); testFind("Presumed deleted but found 'one'", 0, index, document, has, "one"); testFind("Presumed deleted but found 'one two three'", 0, index, document, has, "one two three"); // don't add empty literals assertFalse("Adding an empty literal string should fail", index.add("subject","predicate", "")); assertFalse("Adding an empty literal string should fail", index.add("subject","predicate", " ")); assertTrue("Adding a string containing slashes to the fulltext string pool", index.add("subject", "predicate", "this/is/a/slash/test")); index.commit(); index.close(); index = new FullTextStringIndex(cache, true, true); testFind("Reverse lookup was expecting 4 documents returned", 4, index, document, has, "?ommittee"); testFind("Reverse lookup was expecting 3 documents returned", 3, index, document, has, "*iv"); testFind("Reverse lookup was expecting 26 documents returned", 26, index, document, has, "study *roup"); testFind("Reverse lookup was expecting 10 documents returned", 10, index, document, has, "+study +*roup"); testFind("Reverse lookup was expecting 11 documents returned", 11, index, document, has, "-study +*roup"); testFind("Reverse lookup was expecting 1 document returned", 1, index, document, has, "+*hrombosis"); // test removing all documents index.removeAll(); index.commit(); index.close(); index = new FullTextStringIndex(cache, true, true); testFind("Got unexpected documents after removeAll:", 0, index, document, has, "European"); testFind("Got unexpected documents after removeAll:", 0, index, document, has, "+study +*roup"); } finally { if (index != null) index.close(); cache.close(); assertTrue("Unable to remove all index files", cache.removeAllIndexes()); } } /** * Test corner cases (null subject, object, predicate, etc). * * @throws Exception Test fails */ public void testFullTextStringPoolCornerCases() throws Exception { LuceneIndexerCache cache = new LuceneIndexerCache(indexDirectory); FullTextStringIndex index = null; try { String document = "http://mulgara.org/mulgara/document#"; String has = "http://mulgara.org/mulgara/document#has"; //Clean any existing indexes. cache.close(); cache.removeAllIndexes(); cache = new LuceneIndexerCache(indexDirectory); //create the index index = new FullTextStringIndex(cache, true, true); // Add strings to the index try { index.add(null, has, "foo"); fail("exception expected for adding null subject"); } catch (FullTextStringIndexException ftsie) { } try { index.add(document, null, "foo"); fail("exception expected for adding null predicate"); } catch (FullTextStringIndexException ftsie) { } try { index.add(document, has, null); fail("exception expected for adding null literal"); } catch (FullTextStringIndexException ftsie) { } try { index.add("", has, "foo"); fail("exception expected for adding empty subject"); } catch (FullTextStringIndexException ftsie) { } try { index.add(document, "", "foo"); fail("exception expected for adding empty predicate"); } catch (FullTextStringIndexException ftsie) { } index.add(document, has, ""); // remove strings from the index try { index.remove(null, has, "foo"); fail("exception expected for removing null subject"); } catch (FullTextStringIndexException ftsie) { } try { index.remove(document, null, "foo"); fail("exception expected for removing null predicate"); } catch (FullTextStringIndexException ftsie) { } try { index.remove(document, has, null); fail("exception expected for removing null literal"); } catch (FullTextStringIndexException ftsie) { } try { index.remove("", has, "foo"); fail("exception expected for removing empty subject"); } catch (FullTextStringIndexException ftsie) { } try { index.remove(document, "", "foo"); fail("exception expected for removing empty predicate"); } catch (FullTextStringIndexException ftsie) { } index.remove(document, has, ""); } finally { if (index != null) index.close(); cache.close(); assertTrue("Unable to remove all index files", cache.removeAllIndexes()); } } /** * 1. Test the loading of text files into the fulltext string pool 2. Checking * for existance 3. Test removal of files * * @throws Exception Test fails */ public void testFullTextStringPoolwithFiles() throws Exception { // create a new index direcotry LuceneIndexerCache cache = new LuceneIndexerCache(indexDirectory); FullTextStringIndex index = null; try { // make sure the index directory is empty cache.close(); assertTrue("Unable to remove all index files", cache.removeAllIndexes()); cache = new LuceneIndexerCache(indexDirectory); // create a new index index = new FullTextStringIndex(cache, true, true); logger.debug("Obtaining text text documents from " + textDirectory); File directory = new File(textDirectory); File[] textDocuments = directory.listFiles(new FilenameFilter() { public boolean accept(File dir, String name) { return name.endsWith(".txt"); } }); // keep a track of the number of documents added. int docsAdded = 0; // Loop over the text documents locatd in the text directory for (File doc : textDocuments) { if (doc.isFile()) { // open a reader to the text file. Reader reader = new InputStreamReader(new FileInputStream(doc)); // Add the text document to the index if (index.add(doc.toURI().toString(), "http://mulgara.org/mulgara/Document#Content", doc.toURI().toString(), reader)) { logger.debug("Indexed text document " + doc.toString()); docsAdded++; } // clean up the stream reader.close(); } } logger.debug("Text documents indexed :" + docsAdded); // check if all text documents were indexed assertEquals("Expected 114 text documents to be indexed", 114, docsAdded); // commit the new docs index.commit(); index.close(); index = new FullTextStringIndex(cache, true, true); // Perform a search for 'supernatural' in the // document content predicate FullTextStringIndex.Hits hits = index.find(null, "http://mulgara.org/mulgara/Document#Content", "supernatural"); // check if all text documents were indexed assertEquals("Expected 6 hits with the word 'supernatural'", 6, hits.length()); // loop through the results and remove the documents containing // the word 'supernatural' int docsRemoved = 0; for (int docNo = 0; docNo < hits.length(); docNo++) { String uri = hits.doc(docNo, null).getFieldable(FullTextStringIndex.SUBJECT_KEY).stringValue(); logger.debug("Found supernatural in :" + uri); // Remove the text documents from the index if (index.remove(uri, "http://mulgara.org/mulgara/Document#Content", uri)) { docsRemoved++; } } hits.close(); // check the document were removed assertEquals("Expected 6 documents to be removed'", 6, docsRemoved); // commit the removal index.commit(); index.close(); index = new FullTextStringIndex(cache, true, true); // Perform a search for 'supernatural' in the // document content predicate // check if all text documents are not present. testFind("Expected 0 hits with the word 'supernatural'", 0, index, null, "http://mulgara.org/mulgara/Document#Content", "supernatural"); } finally { // close the fulltextstringpool if (index != null) index.close(); cache.close(); assertTrue("Unable to remove all index files", cache.removeAllIndexes()); } } /** * Test commit and rollback, with and without a prepare. * * @throws Exception Test fails */ public void testFullTextStringPoolTransactions() throws Exception { doTestFullTextStringPoolTransactions(false); doTestFullTextStringPoolTransactions(true); } private void doTestFullTextStringPoolTransactions(boolean prepare) throws Exception { // create a new index direcotry LuceneIndexerCache cache = new LuceneIndexerCache(indexDirectory); FullTextStringIndex index = null; String document = "http://mulgara.org/mulgara/document#"; String has = "http://mulgara.org/mulgara/document#has"; try { //Clean any existing indexes. cache.close(); cache.removeAllIndexes(); cache = new LuceneIndexerCache(indexDirectory); //create the index index = new FullTextStringIndex(cache, true, false); // Add strings to the index for (String literal : theStrings) { index.add(document, has, literal); } // roll back if (prepare) index.prepare(); index.rollback(); index.close(); // ensure strings are not there index = new FullTextStringIndex(cache, true, false); for (String literal : theStrings) { FullTextStringIndex.Hits hits = index.find(document, has, literal); assertTrue("Unexpectedly found '" + literal + "'", hits.length() == 0); hits.close(); } // add strings to index again for (String literal : theStrings) { index.add(document, has, literal); } // this time commit if (prepare) index.prepare(); index.commit(); index.close(); // ensure strings are there now index = new FullTextStringIndex(cache, true, false); for (String literal : theStrings) { FullTextStringIndex.Hits hits = index.find(document, has, literal); assertTrue("Did not find '" + literal + "'", hits.length() != 0); hits.close(); } } finally { // close the fulltextstringpool if (index != null) index.close(); cache.close(); assertTrue("Unable to remove all index files", cache.removeAllIndexes()); } } private static void testFind(String msg, int len, FullTextStringIndex index, String s, String p, String o) throws Exception { FullTextStringIndex.Hits hits = index.find(s, p, o); assertNotNull(hits); try { assertEquals(msg, len, hits.length()); } finally { hits.close(); } } private static void testHas(String msg, FullTextStringIndex index, String s, String p, String o) throws Exception { FullTextStringIndex.Hits hits = index.find(s, p, o); assertNotNull(hits); try { assertTrue(msg, hits.length() > 0); } finally { hits.close(); } } }