package pdfainspector; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.PrintWriter; /** * Executable file of PdfInspector. * This is the main class, which calls the XML and JSON generators and feeds * the results to the Rules Engine. * @author schiele1 */ public class PdfAInspector { /** * Reads in the PDF files, calls the XML and JSON generators in the * PdfExtractor class, and feeds the results to the Rules Engine. * @param args A list of space-separated strings, each being the filepath * to a PDF on which the inspector is to be run. */ public static void main(String[] args){ // Make sure there's actually a file to inspect. if(args.length == 0 || args[0].equals("-h") || args[0].equals("-help")){ System.out.println("Usage: java -jar pdfainspector.jar \"/path/to/file/document.pdf\"\n" + "You can simultaneously enter any number of filepaths, separated by spaces," + " all enclosed in quotes (if they have spaces) and all of them will be analyzed.\n" + "You can type java -jar pdfainspector.jar -h or -help to display this help message."); return; } // Run our rules on each specified PDF, but only if it's a PDF. for(String pdfName : args){ if(!checkExtension(pdfName)){ System.err.println(pdfName + " is not a PDF File"); continue; } // Determine the source PDF and destination XML and JSON files. String pathname = extractPath(pdfName); String filename = extractFilename(pdfName); String xmlName = pathname + "xml-" + filename + ".xml"; String jsonName = pathname + "json-" + filename + ".json"; PrintWriter writer; // Generate our XML file using the PdfExtractor class. System.out.println("Generating XML for " + pdfName + "..."); File xmlFile = new File(xmlName); String xml = PdfExtractor.extractToXML(pdfName); try{ writer = new PrintWriter(new FileOutputStream(xmlFile)); writer.println(xml); writer.flush(); writer.close(); }catch(FileNotFoundException e){ System.err.println("Error generating XML file for " + pdfName); return; } System.out.println("XML file " + xmlName + " generated."); // Convert our XML file to JSON using PdfExtractor. System.out.println("Generating JSON for " + pdfName + "..."); File jsonFile = new File(jsonName); String json = PdfExtractor.convertXMLToJSON(xml); try{ writer = new PrintWriter(new FileOutputStream(jsonFile)); writer.println(json); writer.flush(); writer.close(); }catch(FileNotFoundException e){ System.err.println("Error generating JSON file for " + pdfName); return; } System.out.println("JSON file " + jsonName + " generated."); } } /** * Given a filepath, it separates the path from the filename and returns * the path. * @param filepath The filepath string from which to extract. * @return The path component of the string, without the filename or * extension. */ public static String extractPath(String filepath){ int lastSlash = filepath.lastIndexOf('/'); if(lastSlash >= 0){ return filepath.substring(0, lastSlash + 1); } return ""; } /** * Given a filepath, it separates the path from the filename and returns * the filename, without the extension. * @param filepath The filepath string from which to extract. * @return The filename component of the string, without the path or * extension. */ public static String extractFilename(String filepath){ int lastDot = filepath.lastIndexOf('.'); int lastSlash = filepath.lastIndexOf('/'); return filepath.substring(lastSlash + 1, lastDot); } /** * Checks whether the extension of the given filepath is pdf. * @param filepath The filepath to check. * @return True if the extension is .pdf, False if it isn't. */ public static boolean checkExtension(String filepath){ int lastDot = filepath.lastIndexOf('.'); if(lastDot >= 0){ String extension = filepath.substring(lastDot + 1); return extension.toLowerCase().equals("pdf"); } return false; } }