/* * Copyright (c) 2010 Brasiliana Digital Library (http://brasiliana.usp.br). * Based on similar source code from Adore Djatoka. * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA * */ package gov.lanl.adore.djatoka.plugin; import gov.lanl.adore.djatoka.DjatokaDecodeParam; import gov.lanl.adore.djatoka.DjatokaException; import gov.lanl.adore.djatoka.IExtract; import gov.lanl.adore.djatoka.openurl.OpenURLJP2KService; import gov.lanl.adore.djatoka.util.IOUtils; import gov.lanl.adore.djatoka.util.ImageProcessingUtils; import gov.lanl.adore.djatoka.util.ImageRecord; import java.awt.Color; import java.awt.Graphics2D; import java.awt.Rectangle; import java.awt.image.BufferedImage; import java.awt.Dimension; import java.awt.Rectangle; import javax.imageio.ImageIO; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.regex.MatchResult; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.ArrayList; import java.util.Arrays; import java.util.Map; import java.util.HashMap; import java.util.Properties; import java.util.Enumeration; import java.util.StringTokenizer; import java.util.logging.Level; import org.apache.log4j.Logger; import gov.lanl.util.ConfigurationManager; /** * Uses Poppler PDF commands to extract PDF pages to PNG files. * @author Fabio N. Kepler */ public class ExtractPDF implements IExtract { private static Logger logger = Logger.getLogger(ExtractPDF.class); // maximum size of either preview image dimension private static final int MAX_PX = 1000; // maxium DPI private static final int MAX_DPI = 180; private static int DEFAULT_DENSITY = 72; //private static String DEFAULT_COLORSPACE = "RGB"; private static int DEFAULT_LEVELS = 4; // command to get image from PDF; @FILE@, @OUTPUT@ etc are placeholders private static final String PDFTOPPM_COMMAND[] = { "@COMMAND@", "-q", "-png", "-f", "@FIRSTPAGE@", "-l", "@LASTPAGE@", "-r", "@DPI@", "@FILE@", "@OUTPUTFILE@" }; private static final int PDFTOPPM_COMMAND_POSITION_BIN = 0; private static final int PDFTOPPM_COMMAND_POSITION_FIRSTPAGE = 4; private static final int PDFTOPPM_COMMAND_POSITION_LASTPAGE = 6; private static final int PDFTOPPM_COMMAND_POSITION_DPI = 8; private static final int PDFTOPPM_COMMAND_POSITION_OPTIONAL_EXTRAS = 9; // Must insert at this position instead of just setting it. private static final int PDFTOPPM_COMMAND_POSITION_FILE = 9; private static final int PDFTOPPM_COMMAND_POSITION_OUTPUTFILE = 10; // command to get image from PDF; @FILE@, @OUTPUT@ etc are placeholders private static final String PDFINFO_COMMAND[] = { "@COMMAND@", "-f", "@FIRSTPAGE@", "-l", "@LASTPAGE@", "-box", "@FILE@" }; private static final int PDFINFO_COMMAND_POSITION_BIN = 0; private static final int PDFINFO_COMMAND_POSITION_FIRSTPAGE = 2; private static final int PDFINFO_COMMAND_POSITION_LASTPAGE = 4; private static final int PDFINFO_COMMAND_POSITION_FILE = 6; // executable path for "pdftoppm", comes from DSpace config at runtime. private static String pdftoppmPath = null; // executable path for "pdfinfo", comes from DSpace config at runtime. private static String pdfinfoPath = null; // match line in pdfinfo output that describes file's MediaBox private static final Pattern MEDIABOX_PATT = Pattern.compile( //"^Page\\s+(\\d+)\\s+size:\\s+([\\.\\d]+)\\s+x\\s+([\\.\\d]+)\\s+pts\\.*$"); // Does not seem to match "Page 41 size: 595 x 842 pts (A4)". "^Page\\s+(\\d+)\\s+MediaBox:\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)\\s+([\\.\\d-]+)"); // For use with -box switch /* Without the -box switch: Page 1 size: 444.72 x 771.12 pts Page 2 size: 416.16 x 743.52 pts */ /* With -box switch: Page 1 size: 444.72 x 771.12 pts Page 2 size: 416.16 x 743.52 pts Page 1 MediaBox: 0.00 0.00 444.72 771.12 Page 1 CropBox: 0.00 0.00 444.72 771.12 Page 1 BleedBox: 0.00 0.00 444.72 771.12 Page 1 TrimBox: 0.00 0.00 444.72 771.12 Page 1 ArtBox: 0.00 0.00 444.72 771.12 Page 2 MediaBox: 0.00 0.00 416.16 743.52 Page 2 CropBox: 0.00 0.00 416.16 743.52 Page 2 BleedBox: 0.00 0.00 416.16 743.52 Page 2 TrimBox: 0.00 0.00 416.16 743.52 Page 2 ArtBox: 0.00 0.00 416.16 743.52 */ // match line in pdfinfo output that describes file's MediaBox private static final Pattern PAGES_PATT = Pattern.compile( "^Pages:\\s+([\\d-]+)"); private static Properties props = new Properties(); private static final String DEFAULT_PDFTOPPM_PATH = "/usr/bin/pdftoppm"; private static final String DEFAULT_PDFINFO_PATH = "/usr/bin/pdfinfo"; private static final String PROPS_PDF_PDFTOPPM_PATH = "PDF.pdftoppmPath"; private static final String PROPS_PDF_PDFINFO_PATH = "PDF.pdfinfoPath"; /** * Returns PDF props in ImageRecord * @param r ImageRecord containing absolute file path of PDF file. * @return a populated ImageRecord object * @throws DjatokaException */ @Override public final ImageRecord getMetadata(ImageRecord r) throws DjatokaException { if ((r.getImageFile() == null || !new File(r.getImageFile()).exists()) && r.getObject() == null) throw new DjatokaException("Image Does Not Exist: " + r.toString()); logger.debug("Get metadata: " + r.toString()); try { DjatokaDecodeParam params = new DjatokaDecodeParam(); BufferedImage bi = process(r, params); r.setWidth(bi.getWidth()); r.setHeight(bi.getHeight()); r.setDWTLevels(DEFAULT_LEVELS); r.setLevels(DEFAULT_LEVELS); r.setBitDepth(bi.getColorModel().getPixelSize()); r.setNumChannels(bi.getColorModel().getNumColorComponents()); //r.setCompositingLayerCount(getNumberOfPages(r)); // Semantics: number of pages in the PDF file. HashMap<String, String> pdfProps = (HashMap<String, String>)getPDFProperties(r); int n = Integer.parseInt(pdfProps.remove("Pages")); r.setCompositingLayerCount(n); // Since it is not possible for the viewer to query about a specific page's width and height // (because in Djatoka's point of view a PDF is just one image with various compositing layers, which are the pages), // at this point right here we query the PDF file about the size of all pages and store this // information in a Map. This map can be returned by getMetadata by setting it as the instProps member of the // ImageRecord class, which Djatoka already implements and which is returned as JSON to the viewer JS. // The viewer then has to store this information and later query it instead of asking Djatoka (getMetadata) again. //Map<String, String> instProps = getPagesSizes(r); r.setInstProps(pdfProps); logger.debug("instProps: " + r.getInstProps()); logger.debug("Get metadata: "+r.toString()); } catch (Exception e) { throw new DjatokaException(e); } return r; } //* public final ImageRecord getMetadata(BufferedImage bi) throws DjatokaException { if (bi == null) throw new DjatokaException("Image Does Not Exist"); logger.debug("getMetadata(BufferedImage): " + bi.getWidth()); try { ImageRecord r = new ImageRecord(); r.setWidth(bi.getWidth()); r.setHeight(bi.getHeight()); r.setDWTLevels(DEFAULT_LEVELS); r.setLevels(DEFAULT_LEVELS); r.setBitDepth(bi.getColorModel().getPixelSize()); r.setNumChannels(bi.getColorModel().getNumColorComponents()); //r.setCompositingLayerCount(getNumberOfPages(r)); // 'bi' refers to just one page extracted from the PDF file. //logger.debug("r2: "+r.toString()); //TODO return r; } catch (Exception e) { throw new DjatokaException(e); } } //*/ @Override // TODO // FIXME public final String[] getXMLBox(ImageRecord r) throws DjatokaException { String[] xml = null; try { if (r.getImageFile() == null && r.getObject() != null && r.getObject() instanceof InputStream) { // xml = new JP2ImageInfo((InputStream) r.getObject()).getXmlDocs(); } else { // xml = new JP2ImageInfo(new File(r.getImageFile())).getXmlDocs(); } } catch (Exception e) { logger.error(e, e); } return xml; } /** * Extracts region defined in DjatokaDecodeParam as BufferedImage * @param input absolute file path of PDF file. * @param params DjatokaDecodeParam instance containing region and transform settings. * @return extracted region as a BufferedImage * @throws DjatokaException */ @Override public BufferedImage process(String input, DjatokaDecodeParam params) throws DjatokaException { logger.debug("ExtractPDF.process:\n\tinput: " + input + "\n\tparams: " + params); if (input == null) throw new DjatokaException("Unknown failure while converting file: no image produced."); try { setPDFCommandsPath(); } catch (IllegalStateException e) { logger.error("Failed to set PDF commands path: ",e); throw e; } int page_number = 1 + params.getCompositingLayer(); // From 0-based to 1-based. int status = 0; BufferedImage processedImage = null; try { /* // First get max physical dim of bounding box of the page // to compute the DPI to ask for.. otherwise some AutoCAD // drawings can produce enormous files even at 75dpi, for // 48" drawings.. int dpi = 0; Dimension pageSize = getPDFPageSize(input, page_number); if (pageSize == null) { logger.error("Sanity check: Did not find \"Page " + page_number + " size\" line in output of pdfinfo, file="+input); throw new IllegalArgumentException("Failed to get \"Page " + page_number + " size\" of PDF with pdfinfo."); } else { double w = pageSize.getWidth(); double h = pageSize.getHeight(); int maxdim = (int)Math.max(Math.abs(w), Math.abs(h)); dpi = Math.min(MAX_DPI, (MAX_PX * 72 / maxdim)); logger.debug("DPI: pdfinfo method got dpi="+dpi+" for max dim="+maxdim+" (points, 1/72\")"); } */ // Scale int dpi = getScaledDPI(params); // Requires Sun JAI imageio additions to read ppm directly. // this will get "-[0]+1.ppm" appended to it by pdftoppm File outPrefixF = File.createTempFile("pdftopng", "out"); String outPrefix = outPrefixF.toString(); outPrefixF.delete(); //String pdfCmd[] = PDFTOPPM_COMMAND.clone(); ArrayList<String> pdfCmd = new ArrayList<String>(Arrays.asList(PDFTOPPM_COMMAND)); pdfCmd.set(PDFTOPPM_COMMAND_POSITION_BIN, pdftoppmPath); pdfCmd.set(PDFTOPPM_COMMAND_POSITION_FIRSTPAGE, "" + page_number); pdfCmd.set(PDFTOPPM_COMMAND_POSITION_LASTPAGE, "" + page_number); pdfCmd.set(PDFTOPPM_COMMAND_POSITION_DPI, String.valueOf(dpi)); pdfCmd.set(PDFTOPPM_COMMAND_POSITION_FILE, input.toString()); pdfCmd.set(PDFTOPPM_COMMAND_POSITION_OUTPUTFILE, outPrefix); // Crop Rectangle crop = getCropParam(params); if (crop != null) { String[] cropParams = {"-x", ""+(int)crop.getX(), "-y", ""+(int)crop.getY(), "-W", ""+(int)crop.getWidth(), "-H", ""+(int)crop.getHeight()}; pdfCmd.addAll(PDFTOPPM_COMMAND_POSITION_OPTIONAL_EXTRAS, Arrays.asList(cropParams)); } String[] pdfCmdA = pdfCmd.toArray(new String[pdfCmd.size()]); logger.debug("Running pdftoppm command: " + Arrays.deepToString(pdfCmdA)); //logger.debug("Running pdftoppm command: " + pdfCmd.toString()); File outf = null; Process pdfProc = null; try { pdfProc = Runtime.getRuntime().exec(pdfCmdA); status = pdfProc.waitFor(); logger.debug("status: " + status); // pdftoppm uses variable numbers of padding 0s to the output prefix. // E.g., may be prefix-000001.png, prefix-001.png or even prefix-01.png. // Version 0.12.3 (Poppler, not XPDF) seems to consider the total number of pages. // So, for example, in a PDF with 90 pages, the output will be "prefix-02.png"; // for a PDF with 350 pages, the output will be "prefix-002.png". // FIXME: try some approach where the PDF number of pages is considered without // running pdfinfo command again, thus making it simpler to determine the number // of padding zeros. Right now we going "brute force" because we do not know if // it is feasable to once again run the pdfinfo command. String tests[] = { outPrefix + "-" + page_number + ".png", outPrefix + "-0" + page_number + ".png", outPrefix + "-00" + page_number + ".png", outPrefix + "-000" + page_number + ".png", outPrefix + "-0000" + page_number + ".png", outPrefix + "-00000" + page_number + ".png" }; for (String outname : tests) { if ((new File(outname)).exists()) { outf = new File(outname); break; } } logger.debug("PDFTOPPM output is: "+outf+", exists=" + outf != null ? outf.exists() : "!"); processedImage = ImageIO.read(outf); // Rotate if (params.getRotationDegree() > 0) { processedImage = ImageProcessingUtils.rotate(processedImage, params.getRotationDegree()); } } catch (InterruptedException e) { logger.error("Failed converting PDF file to image: ", e); throw new IllegalArgumentException("Failed converting PDF file to image: ", e); } finally { if (outf != null) outf.delete(); // Our exec() should not produce any output, but we want to stay safe. // http://mark.koli.ch/2011/01/leaky-pipes-remember-to-close-your-streams-when-using-javas-runtimegetruntimeexec.html org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getOutputStream()); org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getInputStream()); org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getErrorStream()); } } catch (Exception e) { logger.error("Failed converting PDF file to image: ", e); throw new IllegalArgumentException("Failed converting PDF file to image: ", e); } finally { if (status != 0) logger.error("PDF conversion proc failed, exit status="+status+", file="+input); } return processedImage; } public BufferedImage processUsingTemp(InputStream input, DjatokaDecodeParam params) throws DjatokaException { File in; // Copy to tmp file try { String cacheDir = OpenURLJP2KService.getCacheDir(); if (cacheDir != null) { in = File.createTempFile("tmp", ".pdf", new File(cacheDir)); } else { in = File.createTempFile("tmp", ".pdf"); } FileOutputStream fos = new FileOutputStream(in); in.deleteOnExit(); IOUtils.copyStream(input, fos); } catch (IOException e) { logger.error(e, e); throw new DjatokaException(e); } BufferedImage bi = process(in.getAbsolutePath(), params); if (in != null) { in.delete(); } return bi; } /** * Extracts region defined in DjatokaDecodeParam as BufferedImage * @param input InputStream containing a PDF bitstream. * @param params DjatokaDecodeParam instance containing region and transform settings. * @return extracted region as a BufferedImage * @throws DjatokaException */ @Override public BufferedImage process(InputStream input, DjatokaDecodeParam params) throws DjatokaException { return processUsingTemp(input, params); } /** * Extracts region defined in DjatokaDecodeParam as BufferedImage * @param input ImageRecord wrapper containing file reference, inputstream, etc. * @param params DjatokaDecodeParam instance containing region and transform settings. * @return extracted region as a BufferedImage * @throws DjatokaException */ @Override public BufferedImage process(ImageRecord input, DjatokaDecodeParam params) throws DjatokaException { logger.debug("in imagerecord;"); if (input.getImageFile() != null) return process(input.getImageFile(), params); else if (input.getObject() != null && (input.getObject() instanceof InputStream)) return process((InputStream) input.getObject(), params); else throw new DjatokaException( "File not defined and Input Object Type " + input.getObject().getClass().getName() + " is not supported"); } /** * Get PDF information with pdfinfo: * - "Pages: X": number of pages; * - "Page X size: www.ww hhh.hh": size of each page, in pts. * @returns a map: * - [Pages][n] * - [Page 1][111.11 222.22] * - [Page i][www.ww hhh.hh] * - [Page n][999.99 1000.00] */ private static Map<String, String> getPDFProperties(ImageRecord input) throws DjatokaException { logger.debug("Getting PDF info"); try { setPDFCommandsPath(); } catch (IllegalStateException e) { logger.error("Failed to set PDF commands path: ",e); throw e; } HashMap<String, String> pdfProperties = new HashMap<String, String>(); String sourcePath = null; if (input.getImageFile() != null) { logger.debug("PDFInfo image file: " + input.getImageFile()); sourcePath = input.getImageFile(); } else if (input.getObject() != null && (input.getObject() instanceof InputStream)) { FileInputStream fis = null; fis = (FileInputStream) input.getObject(); File in; // Copy to tmp file try { String cacheDir = OpenURLJP2KService.getCacheDir(); if (cacheDir != null) { in = File.createTempFile("tmp", ".pdf", new File(cacheDir)); } else { in = File.createTempFile("tmp", ".pdf"); } in.deleteOnExit(); FileOutputStream fos = new FileOutputStream(in); IOUtils.copyStream(fis, fos); } catch (IOException e) { logger.error(e, e); throw new DjatokaException(e); } sourcePath = in.getAbsolutePath(); } else { throw new DjatokaException( "File not defined and Input Object Type " + input //.getObject().getClass().getName() + " is not supported"); } String pdfinfoCmd[] = PDFINFO_COMMAND.clone(); pdfinfoCmd[PDFINFO_COMMAND_POSITION_BIN] = pdfinfoPath; pdfinfoCmd[PDFINFO_COMMAND_POSITION_FIRSTPAGE] = "1"; pdfinfoCmd[PDFINFO_COMMAND_POSITION_LASTPAGE] = "-1"; // Last page even we not knowing its number. pdfinfoCmd[PDFINFO_COMMAND_POSITION_FILE] = sourcePath; Process pdfProc = null; try { ArrayList<MatchResult> pageSizes = new ArrayList<MatchResult>(); MatchResult pages = null; pdfProc = Runtime.getRuntime().exec(pdfinfoCmd); BufferedReader lr = new BufferedReader(new InputStreamReader(pdfProc.getInputStream())); String line; for (line = lr.readLine(); line != null; line = lr.readLine()) { Matcher mm1 = PAGES_PATT.matcher(line); if (mm1.matches()) pages = mm1.toMatchResult(); Matcher mm2 = MEDIABOX_PATT.matcher(line); if (mm2.matches()) pageSizes.add(mm2.toMatchResult()); } int istatus = pdfProc.waitFor(); if (istatus != 0) logger.error("pdfinfo proc failed, exit status=" + istatus + ", file=" + sourcePath); if (pages == null) { logger.error("Did not find 'Pages' line in output of pdfinfo command: " + Arrays.deepToString(pdfinfoCmd)); pdfProperties.put("Pages", "0"); } else { //int n = Integer.parseInteger(pages.group(1)); pdfProperties.put("Pages", pages.group(1)); } if (pageSizes.isEmpty()) { logger.error("Did not find \"Page X size\" lines in output of pdfinfo command: " + Arrays.deepToString(pdfinfoCmd)); throw new IllegalArgumentException("Failed to get pages size of PDF with pdfinfo."); } else { for (MatchResult mr : pageSizes) { String page = mr.group(1); float x0 = Float.parseFloat(mr.group(2)); float y0 = Float.parseFloat(mr.group(3)); float x1 = Float.parseFloat(mr.group(4)); float y1 = Float.parseFloat(mr.group(5)); float w = Math.abs(x1 - x0); float h = Math.abs(y1 - y0); // Have to scale page sizes by max dpi (MAX_DPI / DEFAULT_DENSITY). Otherwise, BookReader.js will request the wrong zoom level (svc.level). float ws = w * MAX_DPI / DEFAULT_DENSITY; float hs = h * MAX_DPI / DEFAULT_DENSITY; String width = "" + ws; //mr.group(2); String height = "" + hs; //mr.group(3); pdfProperties.put("Page " + page, width + " " + height); } } } catch (Exception e) { logger.error("Failed getting PDF information: ", e); throw new DjatokaException("Failed getting PDF information: ", e); } finally { // Our exec() should just consume one of the streams, but we want to stay safe. // http://mark.koli.ch/2011/01/leaky-pipes-remember-to-close-your-streams-when-using-javas-runtimegetruntimeexec.html org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getOutputStream()); org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getInputStream()); org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getErrorStream()); } return pdfProperties; } /* private static Dimension getPDFPageSize(String source, int page_number) throws DjatokaException { logger.debug("Getting PDF info for size of page '" + page_number + "'."); Dimension pageDimension = null; try { setPDFCommandsPath(); } catch (IllegalStateException e) { logger.error("Failed to set PDF commands path: ",e); throw e; } String pdfinfoCmd[] = PDFINFO_COMMAND.clone(); pdfinfoCmd[PDFINFO_COMMAND_POSITION_BIN] = pdfinfoPath; pdfinfoCmd[PDFINFO_COMMAND_POSITION_FIRSTPAGE] = "" + page_number; pdfinfoCmd[PDFINFO_COMMAND_POSITION_LASTPAGE] = "" + page_number; // Last page even we not knowing its number. pdfinfoCmd[PDFINFO_COMMAND_POSITION_FILE] = source; Process pdfProc = null; try { MatchResult pageSize = null;; pdfProc = Runtime.getRuntime().exec(pdfinfoCmd); BufferedReader lr = new BufferedReader(new InputStreamReader(pdfProc.getInputStream())); String line; for (line = lr.readLine(); line != null; line = lr.readLine()) { Matcher mm = MEDIABOX_PATT.matcher(line); if (mm.matches()) pageSize = mm.toMatchResult(); } int istatus = pdfProc.waitFor(); if (istatus != 0) logger.error("pdfinfo proc failed, exit status=" + istatus + ", file=" + source); if (pageSize == null) { logger.error("Did not find 'Page " + page_number + " size' line in output of pdfinfo command: " + pdfinfoCmd); //throw new IllegalArgumentException("Failed to get pages size of PDF with pdfinfo."); pageDimension = new Dimension(0, 0); } else { String page = pageSize.group(1); double x0 = Double.parseDouble(pageSize.group(2)); double y0 = Double.parseDouble(pageSize.group(3)); double x1 = Double.parseDouble(pageSize.group(4)); double y1 = Double.parseDouble(pageSize.group(5)); double width = Math.abs(x1 - x0); double height = Math.abs(y1 - y0); pageDimension = new Dimension(); pageDimension.setSize(width, height); } } catch (Exception e) { logger.error("Failed getting PDF page size: ", e); throw new DjatokaException("Failed getting PDF page size: ", e); } finally { // Our exec() should just consume one of the streams, but we want to stay safe. // http://mark.koli.ch/2011/01/leaky-pipes-remember-to-close-your-streams-when-using-javas-runtimegetruntimeexec.html org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getOutputStream()); org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getInputStream()); org.apache.commons.io.IOUtils.closeQuietly(pdfProc.getErrorStream()); } return pageDimension; } */ private int getScaledDPI(DjatokaDecodeParam params) { if (params.getLevel() >= 0) { int levels = DEFAULT_LEVELS; int reduce = levels - params.getLevel(); params.setLevelReductionFactor((reduce >= 0) ? reduce : 0); } else if (params.getLevel() == -1 && params.getRegion() == null && params.getScalingDimensions() != null) { int width = params.getScalingDimensions()[0]; int height = params.getScalingDimensions()[1]; int levels = DEFAULT_LEVELS; int scale_level = Math.min(MAX_DPI, (MAX_PX * DEFAULT_DENSITY / Math.max(Math.abs(width), Math.abs(height)))); int reduce = levels - scale_level; params.setLevelReductionFactor((reduce >= 0) ? reduce : 0); } if (params.getLevelReductionFactor() > 0) { int reduce = 1 << params.getLevelReductionFactor(); // => image.size() / 2^r: reduce 0 means image/1, reduce 1 means image/2, etc. double s = 1.0 / reduce; return (int)(MAX_DPI * s); } return MAX_DPI; } private Rectangle getCropParam(DjatokaDecodeParam params) { if (params.getRegion() != null) { StringTokenizer st = new StringTokenizer(params.getRegion(), "{},"); String token; logger.debug("Region params: " + params.getRegion()); int x, y, w, h; // top if ((token = st.nextToken()).contains(".")) { y = Integer.parseInt(token); } else { y = Integer.parseInt(token); } // left if ((token = st.nextToken()).contains(".")) { x = Integer.parseInt(token); } else { x = Integer.parseInt(token); } // height if ((token = st.nextToken()).contains(".")) { h = Integer.parseInt(token); } else { h = Integer.parseInt(token); } // width if ((token = st.nextToken()).contains(".")) { w = Integer.parseInt(token); } else { w = Integer.parseInt(token); } return new Rectangle(x, y, w, h); } return null; } private static void setPDFCommandsPath() throws IllegalStateException { // sanity check: poppler paths are required. can cache since it won't change if (pdftoppmPath == null || pdfinfoPath == null) { //props = IOUtils.loadConfigByCP(classConfig.getArg("props")); pdftoppmPath = ConfigurationManager.getProperty(PROPS_PDF_PDFTOPPM_PATH, DEFAULT_PDFTOPPM_PATH); pdfinfoPath = ConfigurationManager.getProperty(PROPS_PDF_PDFINFO_PATH, DEFAULT_PDFINFO_PATH); if (pdftoppmPath == null) throw new IllegalStateException("No value for key \"" + PROPS_PDF_PDFTOPPM_PATH + "\" in djatoka.properties! Should be path to pdftoppm executable."); if (pdfinfoPath == null) throw new IllegalStateException("No value for key \"" + PROPS_PDF_PDFINFO_PATH + "\" in djatoka.properties! Should be path to pdfinfo executable."); } } }