package filetools.pdf; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.io.PrintWriter; import java.util.ArrayList; import java.util.Map; import com.itextpdf.text.pdf.PdfReader; //TODO: Some of this input might be interesting for the PDF/A Analysis with PdfBox public class XmpMetadataExtractor { public static void main(String args[]) throws IOException { try { String pdfFolder = utilities.BrowserDialogs.chooseFolder(); PdfReader reader; PrintWriter outputfile = new PrintWriter(new FileWriter(pdfFolder + "//" + "PdfMetadata.xml")); String xmlVersion = "xml version='1.0'"; String xmlEncoding = "encoding='ISO-8859-1'"; String xmlxslStyleSheet = "<?xml-stylesheet type=\"text/xsl\" href=\"PdfMetadataStyle.xsl\"?>"; String xsltLocation = (pdfFolder + "//" + "PdfMetadataStyle.xsl"); output.XslStyleSheets.PdfMetadataCustomizedXsl(xsltLocation); outputfile.println("<?" + xmlVersion + " " + xmlEncoding + "?>"); outputfile.println(xmlxslStyleSheet); outputfile.println("<PdfMetadata>"); ArrayList<File> files = utilities.ListsFiles.getPaths(new File(pdfFolder), new ArrayList<File>()); for (int i = 0; i < files.size(); i++) { String extension = utilities.fileStringUtilities.getExtension(files.get(i).toString()); extension = extension.toLowerCase(); if (extension.equals("pdf")) { outputfile.println("<File>"); String name = utilities.fileStringUtilities.getFileName(files.get(i)); name = reduceXmlEscapors(name); outputfile.println("<FileName>" + name + "</FileName>"); boolean pdfok = filetools.pdf.PdfAnalysis.testPdfOk(files.get(i)); if (pdfok == true) { reader = new PdfReader(files.get(i).toString()); if (reader != null) { Map<String, String> metadata = reader.getInfo(); int metaSize = metadata.size(); outputfile.println("<MetadataEntries>" + metaSize + "</MetadataEntries>"); String[] keys = (String[]) metadata.keySet().toArray(new String[metaSize]); String[] values = (String[]) metadata.values().toArray(new String[metaSize]); for (int j = 0; j < metaSize; j++) { values[j] = reduceXmlEscapors(values[j]); // TODO: transform Umlaute "ue" usw. sonst hat // Travis wieder Probs outputfile.println("<InfoEntry name=\"" + keys[j] + "\">" + values[j] + "</InfoEntry>"); } // Editing for XSLT table Output if (metadata.get("CreationDate") != null) { if (metadata.get("CreationDate").length() > 10) { String creationYear = getYear(metadata.get("CreationDate")); int creationYearInt = Integer.parseInt(creationYear); if (creationYearInt > 1992) { outputfile.println("<CreationYear>" + creationYear + "</CreationYear>"); } String creationDate = transformDate(metadata.get("CreationDate")); outputfile.println("<CreationDate>" + creationDate + "</CreationDate>"); } else { outputfile.println("<CreationDate>" + metadata.get("CreationDate") + "</CreationDate>"); } } if (metadata.get("ModDate") != null) { if (metadata.get("ModDate").length() > 10) { String creationDate = transformDate(metadata.get("ModDate")); outputfile.println("<ModificationDate>" + creationDate + "</ModificationDate>"); } else { outputfile.println("<ModificationDate>" + metadata.get("ModDate") + "</ModificationDate>"); } } char pdfVersion = reader.getPdfVersion(); outputfile.println("<PdfVersion>" + "PDF 1." + pdfVersion + "</PdfVersion>"); if (metadata.get("Title") != null) { String title = reduceXmlEscapors(metadata.get("Title")); outputfile.println("<Title>" + title + "</Title>"); } if (metadata.get("Author") != null) { String author = reduceXmlEscapors(metadata.get("Author")); outputfile.println("<Author>" + author + "</Author>"); } if (metadata.get("Producer") != null) { String producer = reduceXmlEscapors(metadata.get("Producer")); outputfile.println("<Producer>" + producer + "</Producer>"); } if (metadata.get("Creator") != null) { String creator = reduceXmlEscapors(metadata.get("Creator")); outputfile.println("<Creator>" + creator + "</Creator>"); } if (metadata.get("Company") != null) { String company = reduceXmlEscapors(metadata.get("Company")); outputfile.println("<Company>" + company + "</Company>"); } if (metadata.get("Keywords") != null) { String keywords = reduceXmlEscapors(metadata.get("Keywords")); outputfile.println("<Keywords>" + keywords + "</Keywords>"); } /* * String xmpMeta = new * String(reader.getMetadata()); // rdf, // dc // * and // xmp // data outputfile.println("<XmpData>" * + "<![CDATA[" + xmpMeta + "]]>" + "</XmpData>"); */ } } else { outputfile.println("<PdfAnalysis>" + "false" + "</PdfAnalysis>"); } outputfile.println("</File>"); } } outputfile.println("</PdfMetadata>"); outputfile.close(); } catch (Exception e) { System.out.println(e); } } private static String getYear(String creationYear) { creationYear = creationYear.replace("D:", ""); String year = creationYear.substring(0, 4); return year; } private static String reduceXmlEscapors(String string) { string = string.replace("\"", """); string = string.replace("\'", "'"); string = string.replace("<", "<"); string = string.replace(">", ">"); string = string.replace("&", " &"); return string; } private static String transformDate(String creationDate) { creationDate = creationDate.replace("D:", ""); String year = creationDate.substring(0, 4); String month = creationDate.substring(4, 6); String day = creationDate.substring(6, 8); switch (month) { case "01": month = "January"; break; case "02": month = "February"; break; case "03": month = "March"; break; case "04": month = "April"; break; case "05": month = "May"; break; case "06": month = "June"; break; case "07": month = "July"; break; case "08": month = "August"; break; case "09": month = "September"; break; case "10": month = "October"; break; case "11": month = "November"; break; case "12": month = "December"; break; } creationDate = (day + " " + month + " " + year); return creationDate; } }