/* ==================================================================== Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file distributed with this work for additional information regarding copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ==================================================================== */ package org.apache.poi.hssf.extractor; import static org.apache.poi.POITestCase.assertContains; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; import org.apache.poi.EmptyFileException; import org.apache.poi.EncryptedDocumentException; import org.apache.poi.POIDataSamples; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.OfficeXmlFileException; import org.apache.poi.util.RecordFormatException; import org.junit.Ignore; import org.junit.Test; /** * Unit tests for the Excel 5/95 and Excel 4 (and older) text * extractor */ public final class TestOldExcelExtractor { private static OldExcelExtractor createExtractor(String sampleFileName) throws IOException { File file = HSSFTestDataSamples.getSampleFile(sampleFileName); return new OldExcelExtractor(file); } @Test public void testSimpleExcel3() throws IOException { OldExcelExtractor extractor = createExtractor("testEXCEL_3.xls"); // Check we can call getText without error String text = extractor.getText(); // Check we find a few words we expect in there assertContains(text, "Season beginning August"); assertContains(text, "USDA"); // Check we find a few numbers we expect in there assertContains(text, "347"); assertContains(text, "228"); // Check we find a few string-literal dates in there assertContains(text, "1981/82"); // Check the type assertEquals(3, extractor.getBiffVersion()); assertEquals(0x10, extractor.getFileType()); extractor.close(); } @Test public void testSimpleExcel3NoReading() throws IOException { OldExcelExtractor extractor = createExtractor("testEXCEL_3.xls"); assertNotNull(extractor); extractor.close(); } @Test public void testSimpleExcel4() throws IOException { OldExcelExtractor extractor = createExtractor("testEXCEL_4.xls"); // Check we can call getText without error String text = extractor.getText(); // Check we find a few words we expect in there assertContains(text, "Size"); assertContains(text, "Returns"); // Check we find a few numbers we expect in there assertContains(text, "11"); assertContains(text, "784"); // Check the type assertEquals(4, extractor.getBiffVersion()); assertEquals(0x10, extractor.getFileType()); extractor.close(); } @Test public void testSimpleExcel5() throws IOException { for (String ver : new String[] {"5", "95"}) { OldExcelExtractor extractor = createExtractor("testEXCEL_"+ver+".xls"); // Check we can call getText without error String text = extractor.getText(); // Check we find a few words we expect in there assertContains(text, "Sample Excel"); assertContains(text, "Written and saved"); // Check we find a few numbers we expect in there assertContains(text, "15"); assertContains(text, "169"); // Check we got the sheet names (new formats only) assertContains(text, "Sheet: Feuil3"); // Check the type assertEquals(5, extractor.getBiffVersion()); assertEquals(0x05, extractor.getFileType()); extractor.close(); } } @Test public void testStrings() throws IOException { OldExcelExtractor extractor = createExtractor("testEXCEL_4.xls"); String text = extractor.getText(); // Simple strings assertContains(text, "Table 10 -- Examination Coverage:"); assertContains(text, "Recommended and Average Recommended Additional Tax After"); assertContains(text, "Individual income tax returns, total"); // More complicated strings assertContains(text, "$100,000 or more"); assertContains(text, "S corporation returns, Form 1120S [10,15]"); assertContains(text, "individual income tax return \u201Cshort forms.\u201D"); // Formula based strings // TODO Find some then test extractor.close(); } @Test public void testFormattedNumbersExcel4() throws IOException { OldExcelExtractor extractor = createExtractor("testEXCEL_4.xls"); String text = extractor.getText(); // Simple numbers assertContains(text, "151"); assertContains(text, "784"); // Numbers which come from formulas assertContains(text, "0.398"); // TODO Rounding assertContains(text, "624"); // Formatted numbers // TODO // assertContains(text, "55,624"); // assertContains(text, "11,743,477"); extractor.close(); } @Test public void testFormattedNumbersExcel5() throws IOException { for (String ver : new String[] {"5", "95"}) { OldExcelExtractor extractor = createExtractor("testEXCEL_"+ver+".xls"); String text = extractor.getText(); // Simple numbers assertContains(text, "1"); // Numbers which come from formulas assertContains(text, "13"); assertContains(text, "169"); // Formatted numbers // TODO // assertContains(text, "100.00%"); // assertContains(text, "155.00%"); // assertContains(text, "1,125"); // assertContains(text, "189,945"); // assertContains(text, "1,234,500"); // assertContains(text, "$169.00"); // assertContains(text, "$1,253.82"); extractor.close(); } } @Test public void testFromFile() throws IOException { for (String ver : new String[] {"4", "5", "95"}) { String filename = "testEXCEL_"+ver+".xls"; File f = HSSFTestDataSamples.getSampleFile(filename); OldExcelExtractor extractor = new OldExcelExtractor(f); String text = extractor.getText(); assertNotNull(text); assertTrue(text.length() > 100); extractor.close(); } } @Test(expected=OfficeXmlFileException.class) public void testOpenInvalidFile1() throws IOException { // a file that exists, but is a different format createExtractor("WithVariousData.xlsx"); } @Test(expected=RecordFormatException.class) public void testOpenInvalidFile2() throws IOException { // a completely different type of file createExtractor("48936-strings.txt"); } @Test(expected=FileNotFoundException.class) public void testOpenInvalidFile3() throws IOException { // a POIFS file which is not a Workbook InputStream is = POIDataSamples.getDocumentInstance().openResourceAsStream("47304.doc"); try { new OldExcelExtractor(is).close(); } finally { is.close(); } } @Test(expected=EmptyFileException.class) public void testOpenNonExistingFile() throws IOException { // a file that exists, but is a different format OldExcelExtractor extractor = new OldExcelExtractor(new File("notexistingfile.xls")); extractor.close(); } @Test public void testInputStream() throws IOException { File file = HSSFTestDataSamples.getSampleFile("testEXCEL_3.xls"); InputStream stream = new FileInputStream(file); try { OldExcelExtractor extractor = new OldExcelExtractor(stream); String text = extractor.getText(); assertNotNull(text); extractor.close(); } finally { stream.close(); } } @Test public void testInputStreamNPOIHeader() throws IOException { File file = HSSFTestDataSamples.getSampleFile("FormulaRefs.xls"); InputStream stream = new FileInputStream(file); try { OldExcelExtractor extractor = new OldExcelExtractor(stream); extractor.close(); } finally { stream.close(); } } @Test public void testNPOIFSFileSystem() throws IOException { File file = HSSFTestDataSamples.getSampleFile("FormulaRefs.xls"); NPOIFSFileSystem fs = new NPOIFSFileSystem(file); try { OldExcelExtractor extractor = new OldExcelExtractor(fs); extractor.close(); } finally { fs.close(); } } @Test public void testDirectoryNode() throws IOException { File file = HSSFTestDataSamples.getSampleFile("FormulaRefs.xls"); NPOIFSFileSystem fs = new NPOIFSFileSystem(file); try { OldExcelExtractor extractor = new OldExcelExtractor(fs.getRoot()); extractor.close(); } finally { fs.close(); } } @Test public void testDirectoryNodeInvalidFile() throws IOException { File file = POIDataSamples.getDocumentInstance().getFile("test.doc"); NPOIFSFileSystem fs = new NPOIFSFileSystem(file); try { OldExcelExtractor extractor = new OldExcelExtractor(fs.getRoot()); extractor.close(); fail("Should catch exception here"); } catch (FileNotFoundException e) { // expected here } finally { fs.close(); } } @Ignore("Calls System.exit()") @Test public void testMainUsage() throws IOException { PrintStream save = System.err; try { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { PrintStream str = new PrintStream(out, false, "UTF-8"); System.setErr(str); OldExcelExtractor.main(new String[] {}); } finally { out.close(); } } finally { System.setErr(save); } } @Test public void testMain() throws IOException { File file = HSSFTestDataSamples.getSampleFile("testEXCEL_3.xls"); PrintStream save = System.out; try { ByteArrayOutputStream out = new ByteArrayOutputStream(); try { PrintStream str = new PrintStream(out, false, "UTF-8"); System.setOut(str); OldExcelExtractor.main(new String[] {file.getAbsolutePath()}); } finally { out.close(); } String string = new String(out.toByteArray(), "UTF-8"); assertTrue("Had: " + string, string.contains("Table C-13--Lemons")); } finally { System.setOut(save); } } @Test public void testEncryptionException() throws IOException { //test file derives from Common Crawl File file = HSSFTestDataSamples.getSampleFile("60284.xls"); OldExcelExtractor ex = new OldExcelExtractor(file); assertEquals(5, ex.getBiffVersion()); assertEquals(5, ex.getFileType()); try { ex.getText(); fail(); } catch (EncryptedDocumentException e) { assertTrue("correct exception thrown", true); } ex.close(); } }