/** * * PDF2TXT * * @version 1.0.0 vom 24.03.2013 * @author Daniel Ruf */ import java.io.BufferedWriter; import java.io.File; import java.io.FileOutputStream; import java.io.OutputStreamWriter; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.util.PDFTextStripper; import java.io.FileFilter; public class pdf2txt { public static void main(String[] args){ PDDocument pd; BufferedWriter wr; String path2 = "."; File directory = new File(path2); File[] myarray; String version = "1.0.0"; String program = "PDF2TXT"; System.out.println(program + " " + version ); myarray=directory.listFiles(new FileFilter() { public boolean accept(File dir) { return dir.toString().endsWith(".pdf") && !dir.toString().endsWith(".txt.pdf") && dir.isFile(); } }); System.out.println("Found " + myarray.length + " file(s)"); for (int j = 0; j < myarray.length; j++) { int file_number = j+1; System.out.print("\rProcessing file " + file_number + " of " + myarray.length); File path=myarray[j]; String path_current = path.toString(); try { File input = new File(path_current); String filename = path.getName(); File output = new File(path2 + "/"+filename+".txt"); pd = PDDocument.load(input); PDFTextStripper stripper = new PDFTextStripper(); wr = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(output))); stripper.writeText(pd, wr); if (pd != null) { pd.close(); } wr.close(); } catch (Exception e){ } } System.out.println(""); System.out.println("Done"); } }