/** * <a href="http://www.openolat.org"> * OpenOLAT - Online Learning and Training</a><br> * <p> * Licensed under the Apache License, Version 2.0 (the "License"); <br> * you may not use this file except in compliance with the License.<br> * You may obtain a copy of the License at the * <a href="http://www.apache.org/licenses/LICENSE-2.0">Apache homepage</a> * <p> * Unless required by applicable law or agreed to in writing,<br> * software distributed under the License is distributed on an "AS IS" BASIS, <br> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br> * See the License for the specific language governing permissions and <br> * limitations under the License. * <p> * Initial code contributed and copyrighted by<br> * frentix GmbH, http://www.frentix.com * <p> */ package org.olat.search.service.document.file; import java.io.File; import java.io.IOException; import java.net.URISyntaxException; import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.junit.Assert; import org.junit.Test; import org.olat.core.util.vfs.VFSLeaf; import org.olat.test.OlatTestCase; import org.olat.test.VFSJavaIOFile; /** * Test the low memory text extractor for OpenXML (Microsoft Office XML) * documents. * * @author srosse, stephane.rosse@frentix.com, http://www.frentix.com */ public class OfficeDocumentTest extends OlatTestCase { @Test public void testWordOpenXMLDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException { URL docUrl = OfficeDocumentTest.class.getResource("Test_word_indexing.docx"); Assert.assertNotNull(docUrl); VFSLeaf doc = new VFSJavaIOFile(new File(docUrl.toURI())); WordOOXMLDocument document = new WordOOXMLDocument(); FileContent content = document.readContent(doc); Assert.assertNotNull(content); String body = content.getContent(); Assert.assertTrue(body.contains("Document compatibility test")); Assert.assertTrue(body.contains("They prefer to start writing a document at home in desktop or laptop computer")); } @Test public void testWordOOXMLDocumentComparator() { List<String> docs = new ArrayList<>(); docs.add("word/document.xml"); docs.add("word/header1.xml"); docs.add("word/footer3.xml"); docs.add("word/footer.xml"); docs.add("word/footer14.xml"); docs.add("word/header4.xml"); docs.add("word/header25.xml"); Collections.sort(docs, new WordOOXMLDocument.WordDocumentComparator()); Assert.assertEquals("word/header1.xml", docs.get(0)); Assert.assertEquals("word/header4.xml", docs.get(1)); Assert.assertEquals("word/header25.xml", docs.get(2)); Assert.assertEquals("word/document.xml", docs.get(3)); Assert.assertEquals("word/footer.xml", docs.get(4)); Assert.assertEquals("word/footer3.xml", docs.get(5)); Assert.assertEquals("word/footer14.xml", docs.get(6)); } @Test public void testWordDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException { URL docUrl = OfficeDocumentTest.class.getResource("Test_word_indexing.doc"); Assert.assertNotNull(docUrl); VFSLeaf doc = new VFSJavaIOFile(new File(docUrl.toURI())); WordDocument document = new WordDocument(); FileContent content = document.readContent(doc); Assert.assertNotNull(content); String body = content.getContent(); Assert.assertTrue(body.contains("Lorem ipsum dolor sit amet"));//content Assert.assertTrue(body.contains("Rue (domicile)"));//footer } @Test public void testExcelOpenXMLDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException { URL docUrl = OfficeDocumentTest.class.getResource("Test_excel_indexing.xlsx"); Assert.assertNotNull(docUrl); VFSLeaf doc = new VFSJavaIOFile(new File(docUrl.toURI())); ExcelOOXMLDocument document = new ExcelOOXMLDocument(); FileContent content = document.readContent(doc); Assert.assertNotNull(content); String body = content.getContent(); Assert.assertTrue(body.contains("Numbers and their Squares")); Assert.assertTrue(body.contains("225")); } @Test public void testExcelDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException { URL docUrl = OfficeDocumentTest.class.getResource("Test_excel_indexing.xls"); Assert.assertNotNull(docUrl); VFSLeaf doc = new VFSJavaIOFile(new File(docUrl.toURI())); ExcelDocument document = new ExcelDocument(); FileContent content = document.readContent(doc); Assert.assertNotNull(content); String body = content.getContent(); Assert.assertTrue(body.contains("Nachname")); Assert.assertTrue(body.contains("olat4you")); } @Test public void testPowerPointOpenXMLDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException { URL docUrl = OfficeDocumentTest.class.getResource("Test_ppt_indexing.pptx"); Assert.assertNotNull(docUrl); VFSLeaf doc = new VFSJavaIOFile(new File(docUrl.toURI())); PowerPointOOXMLDocument document = new PowerPointOOXMLDocument(); FileContent content = document.readContent(doc); Assert.assertNotNull(content); String body = content.getContent(); Assert.assertTrue(body.contains("Here is some text")); } @Test public void testPowerPointOOXMLDocumentComparator() { List<String> docs = new ArrayList<>(); docs.add("word/dru.xml"); docs.add("ppt/slides/slide9.xml"); docs.add("ppt/slides/slide6.xml"); docs.add("ppt/slides/slide25.xml"); docs.add("ppt/slides/slide.xml"); docs.add("ppt/slides/slide12.xml"); docs.add("ppt/slides/slide3.xml"); Collections.sort(docs, new PowerPointOOXMLDocument.PowerPointDocumentComparator()); Assert.assertEquals("ppt/slides/slide.xml", docs.get(0)); Assert.assertEquals("ppt/slides/slide3.xml", docs.get(1)); Assert.assertEquals("ppt/slides/slide6.xml", docs.get(2)); Assert.assertEquals("ppt/slides/slide9.xml", docs.get(3)); Assert.assertEquals("ppt/slides/slide12.xml", docs.get(4)); Assert.assertEquals("ppt/slides/slide25.xml", docs.get(5)); Assert.assertEquals("word/dru.xml", docs.get(6)); } @Test public void testPowerPointDocument() throws IOException, DocumentException, DocumentAccessException, URISyntaxException { URL docUrl = OfficeDocumentTest.class.getResource("Test_ppt_indexing.ppt"); Assert.assertNotNull(docUrl); VFSLeaf doc = new VFSJavaIOFile(new File(docUrl.toURI())); PowerPointDocument document = new PowerPointDocument(); FileContent content = document.readContent(doc); Assert.assertNotNull(content); String body = content.getContent(); Assert.assertTrue(body.contains("Sample Powerpoint Slide")); } }