/* * File: TextDocumentExtractorTest.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright March 02, 2009, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. Export * of this program may require a license from the United States Government. * See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.text.document.extractor; import gov.sandia.cognition.text.document.Document; import java.net.URI; import org.junit.Test; import static org.junit.Assert.*; /** * Unit tests for class {@code TextDocumentExtractor}. * * @author Justin Basilico * @since 3.0 */ public class TextDocumentExtractorTest { /** Base directory for resources. */ public static final String BASE_DIR = "gov/sandia/cognition/text/document/extractor/resources/"; private URI testTextURI; private URI testHTMLURI; /** * Creates a new test. * @throws Exception */ public TextDocumentExtractorTest() throws Exception { this.testTextURI = ClassLoader.getSystemResource(BASE_DIR + "testText.txt").toURI(); this.testHTMLURI = ClassLoader.getSystemResource(BASE_DIR + "testHTML.html").toURI(); } /** * Test of constructors of class TextDocumentExtractor. */ @Test public void testConstructors() { TextDocumentExtractor instance = new TextDocumentExtractor(); } /** * Test of canExtract method, of class TextDocumentExtractor. * @throws Exception */ @Test public void testCanExtract() throws Exception { TextDocumentExtractor instance = new TextDocumentExtractor(); assertTrue(instance.canExtract(this.testTextURI)); assertFalse(instance.canExtract(this.testHTMLURI)); assertFalse(instance.canExtract(new URI("file://doesNotExist"))); } /** * Test of extractDocument method, of class TextDocumentExtractor. * @throws Exception */ @Test public void testExtractDocument() throws Exception { TextDocumentExtractor instance = new TextDocumentExtractor(); Document document = instance.extractDocument(this.testTextURI); assertEquals("testText.txt", document.getName()); assertEquals("testText", document.getTitleField().getText()); assertEquals("This is a test text document.\n", document.getBodyField().getText()); // TODO: Test extraction of the other fields. } }