/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.util;
import java.io.File;
import java.io.FilenameFilter;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import junit.framework.Test;
import junit.framework.TestCase;
import junit.framework.TestSuite;
import org.apache.pdfbox.pdmodel.PDDocument;
/**
* Test the performance of the PDF text stripper utility.
*
* @author <a href="mailto:ben@benlitchfield.com">Ben Litchfield</a>
* @version $Revision: 1.4 $
*/
public class TestTextStripperPerformance extends TestCase
{
/**
* Test class constructor.
*
* @param name The name of the test class.
*/
public TestTextStripperPerformance( String name )
{
super( name );
}
/**
* Test suite setup.
*/
public void setUp()
{
}
/**
* Validate text extraction on a single file.
*
* @param file The file to validate
* @param bLogResult Whether to log the extracted text
* @throws Exception when there is an exception
*/
public void doTestFile(File file, boolean bLogResult)
throws Exception
{
PDFTextStripper stripper = new PDFTextStripper();
OutputStream os = null;
Writer writer = null;
PDDocument document = null;
try
{
document = PDDocument.load(file);
File outFile = new File(file.getParentFile().getParentFile(), "output/" + file.getName() + ".txt");
os = new FileOutputStream(outFile);
writer = new OutputStreamWriter(os);
stripper.writeText(document, writer);
}
finally
{
if( writer != null )
{
writer.close();
}
if( os != null )
{
os.close();
}
if( document != null )
{
document.close();
}
}
}
/**
* Test to validate text extraction of file set.
*
* @throws Exception when there is an exception
*/
public void testExtract()
throws Exception
{
String filename = System.getProperty("org.apache.pdfbox.util.TextStripper.file");
File testDir = new File("src/test/resources/input");
if ((filename == null) || (filename.length() == 0))
{
File[] testFiles = testDir.listFiles(new FilenameFilter()
{
public boolean accept(File dir, String name)
{
return (name.endsWith(".pdf"));
}
});
for (int n = 0; n < testFiles.length; n++)
{
doTestFile(testFiles[n], false);
}
}
else
{
//doTestFile(new File(testDir, filename), true);
}
}
/**
* Set the tests in the suite for this test class.
*
* @return the Suite.
*/
public static Test suite()
{
return new TestSuite( TestTextStripperPerformance.class );
}
/**
* Command line execution.
*
* @param args Command line arguments.
*/
public static void main( String[] args )
{
String[] arg = {TestTextStripperPerformance.class.getName() };
junit.textui.TestRunner.main( arg );
}
}