/*******************************************************************************
* Copyright (C) 2001, 2007 University of Sydney
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301
* USA
*
* http://www.gnu.org/licenses/gpl.txt
*******************************************************************************/
package tml.test;
import java.io.File;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import static org.junit.Assert.*;
import org.apache.lucene.queryParser.ParseException;
import org.junit.BeforeClass;
import org.junit.Test;
import tml.Configuration;
import tml.corpus.TextDocument;
import tml.storage.Repository;
import tml.storage.importers.TextImporter;
import tml.vectorspace.NoDocumentsInCorpusException;
import tml.vectorspace.NotEnoughTermsInCorpusException;
import tml.vectorspace.TermWeightingException;
/**
* This class tests the processing of documents with TML.
*
* @author Jorge Villalon
*
*/
public class IndexingDocumentsTest extends AbstractTmlIndexingTest {
private static String TESTS_DOCUMENTS_FOLDER = null;
private static String TESTS_NEW_DOCUMENTS_FOLDER = null;
private static File[] LIST_OF_FILES = null;
private static String TESTS_LUCENE_INVALID_PATH = "tests/lucene/nonexistent";
/**
* @throws java.lang.Exception
*/
@BeforeClass
public static void setUpBeforeClass() throws Exception {
AbstractTmlIndexingTest.setUpBeforeClass();
TESTS_DOCUMENTS_FOLDER = Configuration.getTmlFolder() + "/corpora/introLSA";
TESTS_NEW_DOCUMENTS_FOLDER = Configuration.getTmlFolder() + "/corpora/handbookOfLSA";
LIST_OF_FILES = new File[] {
new File(TESTS_NEW_DOCUMENTS_FOLDER + "/b1.txt"),
new File(TESTS_NEW_DOCUMENTS_FOLDER + "/b2.txt")};
}
/**
* This method verifies that a clean index contains no documents.
*
* @throws Exception
*/
@Test
public void testCleanIndex() throws Exception {
List<TextDocument> documents = repository.getAllTextDocuments();
assertEquals(0, documents.size());
}
/**
* Validates that in invalid path raises an exception.
*
* @throws IOException
* @throws SQLException
*/
@Test(expected = IOException.class)
public void testLuceneIndexInvalid() throws IOException, SQLException {
repository = new Repository(TESTS_LUCENE_INVALID_PATH);
}
/**
* Adding a document with one paragraph and one sentence should create three documents in the Lucene index.
*
* @throws Exception
*/
@Test
public void testAddDocument() throws Exception {
int numDocs = repository.getIndexReader().numDocs();
repository.addDocument("myExternalId", "myContent needs two words",
"myTitle", "myUrl", new TextImporter());
int numDocsAfter = repository.getIndexReader().numDocs();
assertEquals(numDocs + 3, numDocsAfter);
}
/**
* Adding files should create the right number of documents in the index.
*
* @throws Exception
*/
@Test
public void testAddFilesInFolder() throws Exception {
int numDocs = repository.getIndexReader().numDocs();
repository.addDocumentsInFolder(TESTS_DOCUMENTS_FOLDER);
int numDocsAfter = repository.getIndexReader().numDocs();
assertEquals(numDocs + 27, numDocsAfter);
}
/**
* Adding a list of files instead of a
*
* @throws IOException
*/
@Test
public void testAddFilesInList() throws IOException {
int numDocs = repository.getIndexReader().numDocs();
repository.addDocumentsInList(LIST_OF_FILES);
int numDocsAfter = repository.getIndexReader().numDocs();
assertEquals(numDocs + 6, numDocsAfter);
}
/**
* Deletes a document.
*
* @throws Exception
*/
@Test
public void testDeleteDocument() throws Exception {
repository.addDocument("myExternalId", "myContent needs two words, no less than that",
"myTitle", "myUrl", new TextImporter());
int numDocs = repository.getIndexReader().numDocs();
TextDocument document = repository.getTextDocument("myExternalId");
assertNotNull(document);
repository.deleteTextDocument(document);
int numDocsAfter = repository.getIndexReader().numDocs();
assertEquals(numDocs - 3, numDocsAfter);
}
/**
* Test method for {@link tml.storage.Repository#getAnalyzer()}.
*/
@Test
public void testGetAnalyzer() {
assertNotNull(repository.getAnalyzer());
}
/**
* Test method for {@link tml.storage.Repository#getIndexReader()}.
*
* @throws IOException
*/
@Test
public void testGetIndexReader() throws IOException {
assertNotNull(repository.getIndexReader());
}
/**
* Test method for
* {@link tml.storage.Repository#LuceneIndex(java.lang.String, java.lang.String)}
* .
*
* @throws IOException
*/
@Test
public void testLuceneIndex() throws IOException {
assertNotNull(repository);
}
/**
* Test method for
*
* @throws IOException
* @throws NoDocumentsInCorpusException
* @throws NotEnoughTermsInCorpusException
* @throws ParseException
* @throws TermWeightingException
* @throws SQLException
*/
@Test
public void testGetTextDocument() throws IOException, ParseException,
NotEnoughTermsInCorpusException, NoDocumentsInCorpusException,
TermWeightingException, SQLException {
int numDocs = repository.getIndexReader().numDocs();
repository.addDocument("myExternalId", "myContent needs two words",
"myTitle", "myUrl", new TextImporter());
int numDocsAfter = repository.getIndexReader().numDocs();
assertEquals(numDocs + 3, numDocsAfter);
TextDocument document = repository.getTextDocument("myExternalId");
assertNotNull(document);
}
}