/** * License Agreement for OpenSearchServer * * Copyright (C) 2013 Emmanuel Keller / Jaeksoft * * http://www.open-search-server.com * * This file is part of OpenSearchServer. * * OpenSearchServer is free software: you can redistribute it and/or * modify it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * OpenSearchServer is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with OpenSearchServer. * If not, see <http://www.gnu.org/licenses/>. **/ package com.jaeksoft.searchlib.test.library; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import java.io.IOException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.xpath.XPathExpressionException; import org.junit.FixMethodOrder; import org.junit.Test; import org.junit.runners.MethodSorters; import org.xml.sax.SAXException; import com.jaeksoft.searchlib.Client; import com.jaeksoft.searchlib.ClientCatalog; import com.jaeksoft.searchlib.SearchLibException; import com.jaeksoft.searchlib.analysis.LanguageEnum; import com.jaeksoft.searchlib.index.IndexDocument; import com.jaeksoft.searchlib.parser.Parser; import com.jaeksoft.searchlib.parser.ParserResultItem; import com.jaeksoft.searchlib.parser.ParserSelector; import com.jaeksoft.searchlib.query.ParseException; import com.jaeksoft.searchlib.request.AbstractSearchRequest; import com.jaeksoft.searchlib.result.AbstractResultSearch; import com.jaeksoft.searchlib.result.ResultDocument; import com.jaeksoft.searchlib.test.LibraryTest; @FixMethodOrder(MethodSorters.NAME_ASCENDING) public class LibraryIndexFileTest { @Test public void testACreateIndex() throws IllegalStateException, IOException, XPathExpressionException, SAXException, ParserConfigurationException, SearchLibException { // Create an index using the FILE_CRAWLER template ClientCatalog.createIndex(LibraryTest.FILE_INDEX_NAME, "FILE_CRAWLER", null); } @Test public void testBIndexPdfDocument() throws SearchLibException, IOException, ClassNotFoundException { // Get the client instance Client client = ClientCatalog.getClient(LibraryTest.FILE_INDEX_NAME); // Check that the PDF test file exixts assertTrue( "File not found: " + LibraryTest.PDF_TEST_FILE.getAbsolutePath(), LibraryTest.PDF_TEST_FILE.exists()); // Get the parser selector instance ParserSelector parserSelector = client.getParserSelector(); // Extract full-text information Parser parser = parserSelector.parseFile(LibraryTest.PDF_TEST_FILE, LanguageEnum.ENGLISH); int count = 0; // The parser may returns several documents for (ParserResultItem parserResultItem : parser.getParserResults()) { // Create a document IndexDocument document = new IndexDocument(LanguageEnum.ENGLISH); // Populate the document with the full-text fields parserResultItem.populate(document); // Add the URL field to the document (building a unique URL) count++; document.addString("url", LibraryTest.PDF_TEST_FILE.toURI() .toString() + "#" + count); // Put in in the index client.updateDocument(document); } } @Test public void testC_SearchData() throws SearchLibException, ParseException { // Get the client instance Client client = ClientCatalog.getClient(LibraryTest.FILE_INDEX_NAME); // Get the default search template AbstractSearchRequest request = (AbstractSearchRequest) client .getNewRequest("search"); // We search the expression "open" request.setQueryString("open source"); // We want the first 10 documents found request.setStart(0); request.setRows(10); // Let's execute the search request AbstractResultSearch<?> results = (AbstractResultSearch<?>) client .request(request); // Check the number of returned document assertEquals(1, results.getNumFound()); // Iterate over the documents found for (ResultDocument document : results) { // Get and check snippet of the content String content = document.getSnippetContent("content", 0); assertNotNull(content); assertTrue(content.trim().length() > 0); } } }