FullTextStringIndexUnitTest.java example

Explorer
mulgara-master
- src
  - jar
  - war
    - server-http
      - java
        HttpServer.java
        HttpServerServlet.java
- tools
  - src
    - org
      - mulgara
        tools
        Sparql.java
        Tql.java
/*
 * The contents of this file are subject to the Mozilla Public License
 * Version 1.1 (the "License"); you may not use this file except in
 * compliance with the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS"
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
 * the License for the specific language governing rights and limitations
 * under the License.
 *
 * The Original Code is the Kowari Metadata Store.
 *
 * The Initial Developer of the Original Code is Plugged In Software Pty
 * Ltd (http://www.pisoftware.com, mailto:info@pisoftware.com). Portions
 * created by Plugged In Software Pty Ltd are Copyright (C) 2001,2002
 * Plugged In Software Pty Ltd. All Rights Reserved.
 *
 * Contributor(s): N/A.
 *
 * [NOTE: The text of this Exhibit A may differ slightly from the text
 * of the notices in the Source Code files of the Original Code. You
 * should use the text of this Exhibit A rather than the text found in the
 * Original Code Source Code for Your Modifications.]
 *
 */

package org.mulgara.resolver.lucene;

// Java 2 standard packages
import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.InputStreamReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;

// 3rd party
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.apache.log4j.Logger;

import org.mulgara.util.TempDir;


/**
 * Test cases for FullTextStringIndex.
 *
 * @author Tate Jones
 *
 * @created 2002-03-17
 *
 * @version $Revision: 1.8 $
 *
 * @modified $Date: 2005/01/05 04:58:47 $ by $Author: newmana $
 *
 * @maintenanceAuthor $Author: newmana $
 *
 * @company <A href="mailto:info@PIsoftware.com">Plugged In Software</A>
 *
 * @copyright © 2002-2003 <A href="http://www.PIsoftware.com/">Plugged In
 *      Software Pty Ltd</A>
 *
 * @licence <a href="{@docRoot}/../../LICENCE">Mozilla Public License v1.1</a>
 */
public class FullTextStringIndexUnitTest extends TestCase {
  /** Directory for the indexes */
  private final static String indexDirectory =
      TempDir.getTempDir().getPath() + File.separator + "fulltextsp";

  /** The directory containing the text documents */
  private final static String textDirectory =
      System.getProperty("cvs.root") + File.separator + "data" + File.separator +
      "fullTextTestData";

  /** Logger */
  private final static Logger logger = Logger.getLogger(FullTextStringIndexUnitTest.class);

  /** Hold a list of test data */
  private List<String> theStrings = new ArrayList<String>();

  /**
   * Create the testing class
   *
   * @param name The name of the test.
   */
  public FullTextStringIndexUnitTest(String name) {
    super(name);
  }

  /**
   * Hook for test runner to obtain a test suite from.
   *
   * @return The test suite to run.
   */
  public static Test suite() {
    TestSuite suite = new TestSuite();
    suite.addTest(new FullTextStringIndexUnitTest("testFullTextStringPool"));
    suite.addTest(new FullTextStringIndexUnitTest("testFullTextStringPoolCornerCases"));
    suite.addTest(new FullTextStringIndexUnitTest("testFullTextStringPoolwithFiles"));
    suite.addTest(new FullTextStringIndexUnitTest("testFullTextStringPoolTransactions"));

    return suite;
  }

  /**
   * Default test runner.
   *
   * @param args The command line arguments
   */
  public static void main(String[] args) {
    junit.textui.TestRunner.run(suite());
  }

  /**
   * Creates a new index required to do the testing.
   *
   * @throws IOException Description of Exception
   */
  public void setUp() throws IOException {
    //Populate a list of strings
    theStrings.add("AACP Pneumothorax Consensus Group");
    theStrings.add("ALS-HPS Steering Group");
    theStrings.add(
        "ALSPAC (Avon Longitudinal Study of Parents and Children) Study Team");
    theStrings.add("ALTS Study group");
    theStrings.add("American Academy of Asthma, Allergy and Immunology");
    theStrings.add("American Association for the Surgery of Trauma");
    theStrings.add("American College of Chest Physicians");
    theStrings.add(
        "Antiarrhythmics Versus Implantable Defibrillator (AVID) Trial Investigators");
    theStrings.add("Antibiotic Use Working Group");
    theStrings.add("Atypical Squamous Cells Intraepithelial");
    theStrings.add("Lesion Triage Study (ALTS) Group");
    theStrings.add(
        "Australasian Society for Thrombosis and Haemostasis (ASTH) Emerging Technologies Group");
    theStrings.add("Benefit Evaluation of Direct Coronary Stenting Study Group");
    theStrings.add("Biomarkers Definitions Working Group.");
    theStrings.add(
        "Canadian Colorectal Surgery DVT Prophylaxis Trial investigators");
    theStrings.add("Cancer Research Campaign Phase I - II Committee");
    theStrings.add("Central Technical Coordinating Unit");
    theStrings.add(
        "Clinical Epidemiology Group from the French Hospital Database on HIV");
    theStrings.add("CNAAB3005 International Study Team");
    theStrings.add("Commissione ad hoc");
    theStrings.add("Committee to Advise on Tropical Medicine and Travel");
    theStrings.add(
        "Comparison of Candesartan and Amlodipine for Safety, Tolerability and Efficacy (CASTLE) Study Investigators");
    theStrings.add(
        "Council on Scientific Affairs, American Medical Association");
    theStrings.add(
        "Dana Consortium on the Therapy of HIV-Dementia and Related Cognitive Disorders");
    theStrings.add("Danish Committee on Scientific Dishonesty");
    theStrings.add("Dengue Network Philippines");
    theStrings.add("Donepezil Study Group");
    theStrings.add("EBPG (European Expert Group on Renal Transplantation)");
    theStrings.add(
        "Arbeitsgemeinschaft Dermatologische Histologie (ADH) der DDG.");
    theStrings.add("EORTC Early Clinical Studies Group");
    theStrings.add("European Renal Association (ERA-EDTA)");
    theStrings.add("European Society for Organ Transplantation (ESOT)");
    theStrings.add("European Study Investigators");
    theStrings.add("European Canadian Glatiramer Acetate Study Group");
    theStrings.add("FAMI Investigator Group");
    theStrings.add("French EGEA study");
    theStrings.add("French National Medical and Health Research Institute");
    theStrings.add(
        "French Parkinson's Disease Genetics Study Group. The European Consortium on Genetic");
    theStrings.add("Susceptibility in Parkinson's Disease");
    theStrings.add("German Hodgkin Study Group");
    theStrings.add("Groupe d'Etude des Lymphomes de l'Adulte (GELA)");
    theStrings.add(
        "Groupe d'Etude et de Recherche Clinique en Oncologie Radiotherapies");
    theStrings.add("Hemophilia Behavioral Intervention Study Group");
    theStrings.add("Hepatitis Interventional Therapy Group");
    theStrings.add("HIV Epidemiology Research Study Group");
    theStrings.add("Houston Congenital CMV Longitudinal Study Group");
    theStrings.add(
        "International Council for Science's Standing Committee on Responsibility and Ethics in Science");
    theStrings.add("International Evidence-Based Group for Neonatal Pain");

    theStrings.add("one");
    theStrings.add("one two");
    theStrings.add("one two three");
    theStrings.add("holidays");
  }

  /**
   * Closes the index used for testing.
   *
   * @throws IOException Description of Exception
   */
  public void tearDown() throws IOException {
  }

  /**
   * 1. Test the loading of strings into the fulltext string pool 2. Checking
   * for existance 3. Test non-stemming 4. Test removal of strings
   *
   * @throws Exception Test fails
   */
  public void testFullTextStringPool() throws Exception {
    LuceneIndexerCache cache = new LuceneIndexerCache(indexDirectory);
    FullTextStringIndex index = null;

    try {
      // Ensure that reverse search is enabled.
      String document = "http://mulgara.org/mulgara/document#";
      String has = "http://mulgara.org/mulgara/document#has";

      //Clean any existing indexes.
      cache.close();
      cache.removeAllIndexes();
      cache = new LuceneIndexerCache(indexDirectory);

      //create the index
      index = new FullTextStringIndex(cache, true, true);

      // Add strings to the index
      for (String literal : theStrings) {
        index.add(document, has, literal);
      }

      index.commit();
      index.close();
      index = new FullTextStringIndex(cache, true, true);

      // Find the strings from the index with both subject & predicate
      for (String literal : theStrings) {
        testHas("failed to find '" + literal + "'", index, document, has, literal);
      }

      // Find the strings from the index with only subject
      for (String literal : theStrings) {
        testHas("failed to find '" + literal + "'", index, document, null, literal);
      }

      // Find the strings from the index with only predicate
      for (String literal : theStrings) {
        testHas("failed to find '" + literal + "'", index, null, has, literal);
      }

      testFind("Stemming match search failed", 0, index, null, null, "\"holiday\"");

      /* Enable when TODO in remove() is fixed
      assertFalse("Should not be able to delete fulltext literal due to incorrect value",
                  index.remove(document, has, "holiday"));
       */

      index.remove(document, has, "one two");
      index.remove(document, has, "one");
      index.remove(document, has, "one two three");

      index.commit();
      index.close();
      index = new FullTextStringIndex(cache, true, true);

      testFind("Presumed deleted but found 'one two'", 0, 
                   index, document, has, "one two");
      testFind("Presumed deleted but found 'one'", 0,
                   index, document, has, "one");
      testFind("Presumed deleted but found 'one two three'", 0,
                   index, document, has, "one two three");

      // don't add empty literals
      assertFalse("Adding an empty literal string should fail",
                  index.add("subject","predicate", ""));
      assertFalse("Adding an empty literal string should fail",
                  index.add("subject","predicate", "  "));

      assertTrue("Adding a string containing slashes to the fulltext string pool",
                 index.add("subject", "predicate", "this/is/a/slash/test"));

      index.commit();
      index.close();
      index = new FullTextStringIndex(cache, true, true);

      testFind("Reverse lookup was expecting 4 documents returned", 4, 
          index, document, has, "?ommittee");

      testFind("Reverse lookup was expecting 3 documents returned", 3, 
          index, document, has, "*iv");

      testFind("Reverse lookup was expecting 26 documents returned", 26, 
          index, document, has, "study *roup");

      testFind("Reverse lookup was expecting 10 documents returned", 10, 
          index, document, has, "+study +*roup");

      testFind("Reverse lookup was expecting 11 documents returned", 11, 
          index, document, has, "-study +*roup");

      testFind("Reverse lookup was expecting 1 document returned", 1, 
          index, document, has, "+*hrombosis");

      // test removing all documents
      index.removeAll();
      index.commit();
      index.close();
      index = new FullTextStringIndex(cache, true, true);

      testFind("Got unexpected documents after removeAll:", 0, 
          index, document, has, "European");

      testFind("Got unexpected documents after removeAll:", 0, 
          index, document, has, "+study +*roup");
    } finally {
      if (index != null) index.close();
      cache.close();
      assertTrue("Unable to remove all index files", cache.removeAllIndexes());
    }
  }


  /**
   * Test corner cases (null subject, object, predicate, etc).
   *
   * @throws Exception Test fails
   */
  public void testFullTextStringPoolCornerCases() throws Exception {
    LuceneIndexerCache cache = new LuceneIndexerCache(indexDirectory);
    FullTextStringIndex index = null;

    try {
      String document = "http://mulgara.org/mulgara/document#";
      String has = "http://mulgara.org/mulgara/document#has";

      //Clean any existing indexes.
      cache.close();
      cache.removeAllIndexes();
      cache = new LuceneIndexerCache(indexDirectory);

      //create the index
      index = new FullTextStringIndex(cache, true, true);

      // Add strings to the index
      try {
        index.add(null, has, "foo");
        fail("exception expected for adding null subject");
      } catch (FullTextStringIndexException ftsie) {
      }

      try {
        index.add(document, null, "foo");
        fail("exception expected for adding null predicate");
      } catch (FullTextStringIndexException ftsie) {
      }

      try {
        index.add(document, has, null);
        fail("exception expected for adding null literal");
      } catch (FullTextStringIndexException ftsie) {
      }

      try {
        index.add("", has, "foo");
        fail("exception expected for adding empty subject");
      } catch (FullTextStringIndexException ftsie) {
      }

      try {
        index.add(document, "", "foo");
        fail("exception expected for adding empty predicate");
      } catch (FullTextStringIndexException ftsie) {
      }

      index.add(document, has, "");

      // remove strings from the index
      try {
        index.remove(null, has, "foo");
        fail("exception expected for removing null subject");
      } catch (FullTextStringIndexException ftsie) {
      }

      try {
        index.remove(document, null, "foo");
        fail("exception expected for removing null predicate");
      } catch (FullTextStringIndexException ftsie) {
      }

      try {
        index.remove(document, has, null);
        fail("exception expected for removing null literal");
      } catch (FullTextStringIndexException ftsie) {
      }

      try {
        index.remove("", has, "foo");
        fail("exception expected for removing empty subject");
      } catch (FullTextStringIndexException ftsie) {
      }

      try {
        index.remove(document, "", "foo");
        fail("exception expected for removing empty predicate");
      } catch (FullTextStringIndexException ftsie) {
      }

      index.remove(document, has, "");

    } finally {
      if (index != null) index.close();
      cache.close();
      assertTrue("Unable to remove all index files", cache.removeAllIndexes());
    }
  }

  /**
   * 1. Test the loading of text files into the fulltext string pool 2. Checking
   * for existance 3. Test removal of files
   *
   * @throws Exception Test fails
   */
  public void testFullTextStringPoolwithFiles() throws Exception {
    // create a new index direcotry
    LuceneIndexerCache cache = new LuceneIndexerCache(indexDirectory);
    FullTextStringIndex index = null;

    try {
      // make sure the index directory is empty
      cache.close();
      assertTrue("Unable to remove all index files", cache.removeAllIndexes());
      cache = new LuceneIndexerCache(indexDirectory);

      // create a new index
      index = new FullTextStringIndex(cache, true, true);

      logger.debug("Obtaining text text documents from " + textDirectory);

      File directory = new File(textDirectory);
      File[] textDocuments = directory.listFiles(new FilenameFilter() {
        public boolean accept(File dir, String name) {
          return name.endsWith(".txt");
        }
      });

      // keep a track of the number of documents added.
      int docsAdded = 0;

      // Loop over the text documents locatd in the text directory
      for (File doc : textDocuments) {
        if (doc.isFile()) {
          // open a reader to the text file.
          Reader reader = new InputStreamReader(new FileInputStream(doc));

          // Add the text document to the index
          if (index.add(doc.toURI().toString(), "http://mulgara.org/mulgara/Document#Content",
                        doc.toURI().toString(), reader)) {
            logger.debug("Indexed text document " + doc.toString());
            docsAdded++;
          }

          // clean up the stream
          reader.close();
        }
      }

      logger.debug("Text documents indexed :" + docsAdded);

      // check if all text documents were indexed
      assertEquals("Expected 114 text documents to be indexed", 114, docsAdded);

      // commit the new docs
      index.commit();
      index.close();
      index = new FullTextStringIndex(cache, true, true);

      // Perform a search for 'supernatural' in the
      // document content predicate
      FullTextStringIndex.Hits hits =
          index.find(null, "http://mulgara.org/mulgara/Document#Content", "supernatural");

      // check if all text documents were indexed
      assertEquals("Expected 6 hits with the word 'supernatural'", 6, hits.length());

      // loop through the results and remove the documents containing
      // the word 'supernatural'
      int docsRemoved = 0;

      for (int docNo = 0; docNo < hits.length(); docNo++) {
        String uri = hits.doc(docNo, null).getFieldable(FullTextStringIndex.SUBJECT_KEY).stringValue();

        logger.debug("Found supernatural in :" + uri);

        // Remove the text documents from the index
        if (index.remove(uri, "http://mulgara.org/mulgara/Document#Content", uri)) {
          docsRemoved++;
        }
      }
      hits.close();

      // check the document were removed
      assertEquals("Expected 6 documents to be removed'", 6, docsRemoved);

      // commit the removal
      index.commit();
      index.close();
      index = new FullTextStringIndex(cache, true, true);

      // Perform a search for 'supernatural' in the
      // document content predicate
      // check if all text documents are not present.
      testFind("Expected 0 hits with the word 'supernatural'", 0,
          index, null, "http://mulgara.org/mulgara/Document#Content", "supernatural");
    } finally {
      // close the fulltextstringpool
      if (index != null) index.close();
      cache.close();
      assertTrue("Unable to remove all index files", cache.removeAllIndexes());
    }
  }

  /**
   * Test commit and rollback, with and without a prepare.
   *
   * @throws Exception Test fails
   */
  public void testFullTextStringPoolTransactions() throws Exception {
    doTestFullTextStringPoolTransactions(false);
    doTestFullTextStringPoolTransactions(true);
  }

  private void doTestFullTextStringPoolTransactions(boolean prepare) throws Exception {
    // create a new index direcotry
    LuceneIndexerCache cache = new LuceneIndexerCache(indexDirectory);
    FullTextStringIndex index = null;

    String document = "http://mulgara.org/mulgara/document#";
    String has = "http://mulgara.org/mulgara/document#has";

    try {
      //Clean any existing indexes.
      cache.close();
      cache.removeAllIndexes();
      cache = new LuceneIndexerCache(indexDirectory);

      //create the index
      index = new FullTextStringIndex(cache, true, false);

      // Add strings to the index
      for (String literal : theStrings) {
        index.add(document, has, literal);
      }

      // roll back
      if (prepare)
        index.prepare();
      index.rollback();
      index.close();

      // ensure strings are not there
      index = new FullTextStringIndex(cache, true, false);

      for (String literal : theStrings) {
        FullTextStringIndex.Hits hits = index.find(document, has, literal);
        assertTrue("Unexpectedly found '" + literal + "'", hits.length() == 0);
        hits.close();
      }

      // add strings to index again
      for (String literal : theStrings) {
        index.add(document, has, literal);
      }

      // this time commit
      if (prepare)
        index.prepare();
      index.commit();
      index.close();

      // ensure strings are there now
      index = new FullTextStringIndex(cache, true, false);

      for (String literal : theStrings) {
        FullTextStringIndex.Hits hits = index.find(document, has, literal);
        assertTrue("Did not find '" + literal + "'", hits.length() != 0);
        hits.close();
      }
    } finally {
      // close the fulltextstringpool
      if (index != null) index.close();
      cache.close();
      assertTrue("Unable to remove all index files", cache.removeAllIndexes());
    }
  }
  
  private static void testFind(String msg, int len, FullTextStringIndex index, String s, String p, String o) throws Exception {
    FullTextStringIndex.Hits hits = index.find(s, p, o);
    assertNotNull(hits);
    try {
      assertEquals(msg, len, hits.length());
    } finally {
      hits.close();
    }
  }
  
  private static void testHas(String msg, FullTextStringIndex index, String s, String p, String o) throws Exception {
    FullTextStringIndex.Hits hits = index.find(s, p, o);
    assertNotNull(hits);
    try {
      assertTrue(msg, hits.length() > 0);
    } finally {
      hits.close();
    }
  }
}