package org.openedit.entermedia.scanner; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.openedit.entermedia.Asset; import org.openedit.entermedia.MediaArchive; import org.openedit.repository.ContentItem; import com.openedit.util.FileUtils; import com.openedit.util.PathUtilities; public class MetadataPdfExtractor extends MetadataExtractor { private static final Log log = LogFactory.getLog(MetadataPdfExtractor.class); public boolean extractData(MediaArchive inArchive, ContentItem inFile, Asset inAsset) { String type = PathUtilities.extractPageType(inFile.getPath()); if (type == null || "data".equals(type.toLowerCase())) { type = inAsset.get("fileformat"); } if (type != null) { type = type.toLowerCase(); if( inAsset.get("fileformat") == null) { inAsset.setProperty("fileformat", type); } if (type.equals("pdf")) { PdfParser parser = new PdfParser(); // ByteArrayOutputStream out = new ByteArrayOutputStream(); InputStream in = null; try { in = inFile.getInputStream(); // try // { // new OutputFiller().fill(in, out); // } // finally // { // FileUtils.safeClose(in); // } // byte[] bytes = out.toByteArray(); Parse results = parser.parse(in); //Do we deal with encoding? //We need to limit this size String fulltext = results.getText(); inAsset.setProperty("fulltext", fulltext); if( inAsset.getInt("width") == 0) { String val = results.get("width"); inAsset.setProperty("width", val); } if( inAsset.getInt("height") == 0) { String val = results.get("height"); inAsset.setProperty("height", val); } inAsset.setProperty("pages", String.valueOf(results.getPages())); if (inAsset.getProperty("assettitle") == null) { String title = results.getTitle(); if( title != null && title.length() < 300) { inAsset.setProperty("assettitle", title); } } } catch( Exception ex) { log.error("cant process" , ex); return false; } finally { FileUtils.safeClose(in); } return true; } } return false; } }