package org.cdlib.xtf.saxonExt.pipe;
/*
* Copyright (c) 2009, Regents of the University of California
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the University of California nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import javax.servlet.http.HttpServletResponse;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.sax.SAXResult;
import org.cdlib.xtf.saxonExt.ElementWithContent;
import org.cdlib.xtf.saxonExt.InstructionWithContent;
import org.cdlib.xtf.servletBase.TextServlet;
import org.cdlib.xtf.util.Trace;
import org.cdlib.xtf.xslt.FileUtils;
import org.xml.sax.SAXException;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.configuration.DefaultConfigurationBuilder;
import org.apache.commons.io.output.ByteArrayOutputStream;
import org.apache.fop.apps.FOPException;
import org.apache.fop.apps.FOUserAgent;
import org.apache.fop.apps.Fop;
import org.apache.fop.apps.FopFactory;
import org.apache.fop.apps.MimeConstants;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.BadPdfFormatException;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfObject;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfString;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.SimpleBookmark;
import com.itextpdf.text.pdf.PdfCopy.PageStamp;
import net.sf.saxon.expr.Expression;
import net.sf.saxon.expr.XPathContext;
import net.sf.saxon.instruct.Executable;
import net.sf.saxon.instruct.TailCall;
import net.sf.saxon.om.Item;
import net.sf.saxon.tinytree.TinyBuilder;
import net.sf.saxon.trans.XPathException;
/**
* Implements a Saxon extension that runs the FOP processor
* to transform XSL-FO formatting instructions into a PDF file,
* and pipes that PDF back to the client.
*/
public class PipeFopElement extends ElementWithContent
{
private static HashMap<String, FopFactory> fopFactories = new HashMap();
private static Lock fopLock = new ReentrantLock();
private enum MergeAt { START, END };
private enum MergeMode { SEQUENTIAL, OVERLAY, UNDERLAY };
public void prepareAttributes() throws XPathException
{
String[] mandatoryAtts = { };
String[] optionalAtts = { "fileName",
"author", "creator", "keywords", "producer", "title",
"overrideMetadata", // default: no
"appendPDF", // backward compatibility only (no default)
"mergePDFFile", // default: none
"mergeAt", // "begin", *"end"
"mergeMode", // *"sequential", "overlay", "underlay"
"fallbackIfError", // default: yes
"fontDirs", // default: none
"waitTime" // default: 5 (seconds)
};
parseAttributes(mandatoryAtts, optionalAtts);
}
public Expression compile(Executable exec) throws XPathException {
return new PipeFopInstruction(attribs, compileContent(exec));
}
/** Worker class for PipeFopElement */
private static class PipeFopInstruction extends InstructionWithContent
{
public PipeFopInstruction(Map<String, Expression> attribs, Expression content)
{
super("pipe:pipeFop", attribs, content);
}
/**
* The real workhorse.
*/
@Override
public TailCall processLeavingTail(XPathContext context)
throws XPathException
{
// Set the content type
HttpServletResponse servletResponse = TextServlet.getCurResponse();
servletResponse.setHeader("Content-type", "application/pdf");
// If output file name specified, add the Content-disposition header.
String fileName = getAttribStr("fileName", context);
if (fileName != null && fileName.length() != 0)
servletResponse.setHeader("Content-disposition", "attachment; filename=\"" + fileName + "\"");
// Get name of file to merge, if any.
String nameToMerge = getAttribStr("mergePDFFile", context,
/*backward-compatibility:*/ getAttribStr("appendPDF", context, null));
// Resolve it to a full path.
File fileToMerge = null;
if (nameToMerge != null) {
fileToMerge = FileUtils.resolveFile(context, nameToMerge);
if (!fileToMerge.canRead())
dynamicError("Cannot read file '" + fileToMerge.toString() + "'", "PIPE_FOP_010", context);
}
// Merge mode (if any)
MergeMode mergeMode = MergeMode.SEQUENTIAL;
String tmp = getAttribStr("mergeMode", context);
if (tmp.equalsIgnoreCase("sequential"))
mergeMode = MergeMode.SEQUENTIAL;
else if (tmp.equalsIgnoreCase("overlay"))
mergeMode = MergeMode.OVERLAY;
else if (tmp.equalsIgnoreCase("underlay"))
mergeMode = MergeMode.UNDERLAY;
else
dynamicError("Unrecognized merge mode '" + tmp + "'", "PIPE_FOP_008", context);
// Merge location (if any)
MergeAt mergeAt = MergeAt.START;
tmp = getAttribStr("mergeAt", context);
if (tmp.equalsIgnoreCase("start"))
mergeAt = MergeAt.START;
else if (tmp.equalsIgnoreCase("end"))
mergeAt = MergeAt.END;
else
dynamicError("Unrecognized merge at '" + tmp + "'", "PIPE_FOP_009", context);
try {
// Interesting workaround: using FOP normally results in an AWT "Window"
// being created. However, since we're running in a servlet container, this
// isn't generally desirable (and often isn't possible.) So we let AWT know
// that it's running in "headless" mode, and this prevents the window from
// being created.
//
System.setProperty("java.awt.headless", "true");
// Despite generally being a NodeInfo, 'content' doesn't seem to work directly
// as a Source. Fortunately TinyBuilder gives us very fast way to convert it.
//
Item contentItem = content.evaluateItem(context);
Source src = TinyBuilder.build((Source)contentItem, null, context.getConfiguration());
// Setup JAXP using identity transformer
TransformerFactory transFactory = new net.sf.saxon.TransformerFactoryImpl();
Transformer transformer = transFactory.newTransformer(); // identity transformer
// So that we can keep the lock on FOP short, and also so we can send an
// accurate Content-length header to the client, we'll accumulate the FOP output
// in a byte buffer. We use the Apache ByteArrayOutputStream class since it
// doesn't constantly realloc-copy when the buffer needs to grow.
//
ByteArrayOutputStream fopOut = new ByteArrayOutputStream();
// According to the Apache docs, FOP may not be thread-safe. So, we need to
// single-thread it. However, we must at all costs keep requests from backing
// up behind each other in a scenario where many clients are making requests
// all at once. So put a time limit on it.
//
int lockTime;
if (attribs.containsKey("waitTime"))
lockTime = Integer.parseInt(attribs.get("waitTime").evaluateAsString(context));
else
lockTime = 5; // default to waiting 5 seconds
boolean gotLock = false;
try {
if (lockTime <= 0) {
gotLock = true;
fopLock.lock();
}
else
gotLock = fopLock.tryLock(lockTime, TimeUnit.SECONDS);
// Failure to get the lock is an error. However, this exception will
// be caught below and, if requested, we'll fall back to just outputting
// the append PDF.
//
if (!gotLock)
throw new TimeoutException("Timed out trying to obtain FOP lock");
// For speed, only create FOP factory if we haven't already got one.
FopFactory fopFactory = createFopFactory(context);
// Apply the optional things that can be added to the PDF header
FOUserAgent foAgent = fopFactory.newFOUserAgent();
if (attribs.containsKey("author"))
foAgent.setAuthor(attribs.get("author").evaluateAsString(context));
if (attribs.containsKey("creator"))
foAgent.setCreator(attribs.get("creator").evaluateAsString(context));
if (attribs.containsKey("keywords"))
foAgent.setKeywords(attribs.get("keywords").evaluateAsString(context));
if (attribs.containsKey("producer"))
foAgent.setProducer(attribs.get("producer").evaluateAsString(context));
if (attribs.containsKey("title"))
foAgent.setTitle(attribs.get("title").evaluateAsString(context));
// Now run FOP
Fop fop = fopFactory.newFop(MimeConstants.MIME_PDF, foAgent, fopOut);
transformer.transform(src, new SAXResult(fop.getDefaultHandler()));
}
finally
{
// Always release the FOP lock when we're done, regardless of what happened.
if (gotLock)
fopLock.unlock();
}
// Now that we've released the FOP lock, check if we need to merge a PDF or not.
ByteArrayOutputStream finalOut;
if (fileToMerge != null) {
finalOut = new ByteArrayOutputStream(fopOut.size() + (int)fileToMerge.length());
mergePdf(context, fopOut.toByteArray(), fileToMerge, mergeMode, mergeAt, finalOut);
}
else
finalOut = fopOut;
// Now we know the output length, so let the client know and then send it.
servletResponse.setHeader("Content-length", Integer.toString(finalOut.size()));
servletResponse.getOutputStream().write(finalOut.toByteArray());
}
catch (Throwable e)
{
// If requested, fall back to simply piping the PDF file itself, without any FOP prefix.
if (fileToMerge != null)
{
if (getAttribBool("fallbackIfError", context, true))
{
try {
Trace.warning("Warning: pipeFop failed, falling back to just piping PDF file. Cause: " + e.toString());
servletResponse.setHeader("Content-length", Long.toString(fileToMerge.length()));
PipeFileElement.copyFileToStream(fileToMerge, servletResponse.getOutputStream());
e = null;
}
catch (IOException e2) {
e = e2;
}
}
}
// Process any resulting exception into a Saxon dynamic error.
if (e != null) {
String code;
if (e instanceof IOException)
code = "PIPE_FOP_001";
else if (e instanceof TransformerException)
code = "PIPE_FOP_002";
else if (e instanceof FOPException)
code = "PIPE_FOP_003";
else if (e instanceof DocumentException)
code = "PIPE_FOP_004";
else if (e instanceof TimeoutException)
code = "PIPE_FOP_005";
else
code = "PIPE_FOP_006";
dynamicError(e, "Error while piping FOP: " + e.toString(), code, context);
}
}
// All done.
return null;
}
/** Create a FOP factory and configure it, if we don't already have one. */
private FopFactory createFopFactory(XPathContext context)
throws ConfigurationException, SAXException, IOException, XPathException
{
// See if any font directories were specified.
String fontDirs = "";
if (attribs.containsKey("fontDirs"))
fontDirs = attribs.get("fontDirs").evaluateAsString(context);
// If we've already created a factory with this set of font directories,
// don't re-create (it's expensive.)
//
if (fopFactories.containsKey(fontDirs))
return fopFactories.get(fontDirs);
// Gotta make a new one.
FopFactory factory = FopFactory.newInstance();
if (fontDirs.length() > 0)
{
// The only way I've figured out to put font search directories into the
// factory is to feed in an XML config file. So construct one.
//
StringBuilder buf = new StringBuilder();
buf.append("<?xml version=\"1.0\"?>" +
"<fop version=\"1.0\">" +
" <renderers>" +
" <renderer mime=\"application/pdf\">" +
" <fonts>");
for (String dir : fontDirs.split(";"))
buf.append(" <directory>" + dir + "</directory>");
buf.append(" </fonts>" +
" </renderer>" +
" </renderers>" +
"</fop>");
// Jump through hoops to make the XML into an InputStream
ByteArrayOutputStream bos = new ByteArrayOutputStream();
OutputStreamWriter osw = new OutputStreamWriter(bos);
osw.write(buf.toString());
osw.flush();
ByteArrayInputStream bis = new ByteArrayInputStream(bos.toByteArray());
// Build the configuration and stick it into the factory.
DefaultConfigurationBuilder cfgBuilder = new DefaultConfigurationBuilder();
Configuration config = cfgBuilder.build(bis);
factory.setUserConfig(config);
}
// Cache this factory so we don't have to create it again (they're expensive.)
fopFactories.put(fontDirs, factory);
return factory;
}
/**
* Do the work of joining the FOP output and a PDF together. This involves
* several steps:
*
* 1. Based on parameters specified in the PipeFOP command, determine how
* the pages will overlap.
* 2. Merge bookmarks and metadata
* 3. Output the pages
*/
private void mergePdf(XPathContext context,
byte[] origPdfData,
File fileToAppend,
MergeMode mergeMode,
MergeAt mergeAt,
OutputStream outStream)
throws IOException, DocumentException, BadPdfFormatException, XPathException
{
PdfReader[] readers = new PdfReader[2];
HashMap<String,String>[] infos = new HashMap[2];
int[] nInPages = new int[2];
int[] pageOffsets = new int[2];
int nOutPages = 0;
// Read in the PDF that FOP generated and the one we're merging
readers[0] = new PdfReader(origPdfData);
readers[1] = new PdfReader(new BufferedInputStream(new FileInputStream(fileToAppend)));
// Perform processing that's identical for both
for (int i=0; i<2; i++)
{
readers[i].consolidateNamedDestinations();
infos[i] = readers[i].getInfo();
nInPages[i] = readers[i].getNumberOfPages();
}
// Calculate page offsets depending on the merge mode.
switch (mergeMode)
{
case SEQUENTIAL:
nOutPages = nInPages[0] + nInPages[1];
switch (mergeAt) {
case START:
pageOffsets[0] = nInPages[1];
pageOffsets[1] = 0;
break;
case END:
pageOffsets[0] = 0;
pageOffsets[1] = nInPages[0];
break;
}
break;
case OVERLAY:
case UNDERLAY:
nOutPages = Math.max(nInPages[0], nInPages[1]);
pageOffsets[0] = 0;
if (mergeAt == MergeAt.END)
pageOffsets[1] = Math.max(0, nInPages[0] - nInPages[1]);
else
pageOffsets[1] = 0;
break;
}
// Construct the copying writer
Document pdfDocument = new Document(readers[0].getPageSizeWithRotation(1));
PdfCopy pdfWriter = new PdfCopy(pdfDocument, outStream);
pdfDocument.open();
// Merge the metadata
mergeMetadata(infos, pdfWriter, context);
// Copy bookmarks from both PDFs
ArrayList allBookmarks = new ArrayList();
for (int i=0; i<2; i++) {
List bookmarks = SimpleBookmark.getBookmark(readers[i]);
if (bookmarks != null) {
if (pageOffsets[i] != 0)
SimpleBookmark.shiftPageNumbers(bookmarks, pageOffsets[i], null);
allBookmarks.addAll(bookmarks);
}
}
PageInfo[] basePages = new PageInfo[nOutPages];
PageInfo[] mergePages = new PageInfo[nOutPages];
// Gather all the info we'll need to merge the pages. For some reason,
// iText needs us to make all the template images before using any
// of them.
//
for (int i = 0; i < nOutPages; i++)
{
for (int j=0; j<2; j++)
{
int inPageNum = i - pageOffsets[j];
if (inPageNum < 0 || inPageNum >= nInPages[j])
continue;
PageInfo info = new PageInfo();
info.reader = readers[j];
info.pageNum = inPageNum+1;
if (basePages[i] == null)
basePages[i] = info;
else {
info.impPage = pdfWriter.getImportedPage(info.reader, info.pageNum);
info.image = Image.getInstance(info.impPage);
mergePages[i] = info;
}
}
}
for (int i = 0; i < nOutPages; i++)
{
PageInfo base = basePages[i];
base.impPage = pdfWriter.getImportedPage(base.reader, base.pageNum);
if (mergePages[i] != null)
{
PageStamp ps = pdfWriter.createPageStamp(base.impPage);
PdfContentByte contentBuf = null;
if (mergeMode == MergeMode.OVERLAY)
contentBuf = ps.getOverContent();
else if (mergeMode == MergeMode.UNDERLAY)
contentBuf = ps.getUnderContent();
else
assert false : "page offset calculations were wrong";
Image img = Image.getInstance(mergePages[i].image); // this is the trick
contentBuf.addImage(img, base.impPage.getWidth(), 0, 0, base.impPage.getHeight(), 0, 0);
ps.alterContents();
}
pdfWriter.addPage(base.impPage);
}
// Set the combined bookmarks.
if (!allBookmarks.isEmpty())
pdfWriter.setOutlines(allBookmarks);
// And we're done.
pdfDocument.close();
}
/**
* Merge metadata from the FOP-generated PDF and a PDF we're merging into it.
* Generally metadata in the merge file takes precedence over the FOP metadata,
* but the "overrideMetadata" option reverses that behavior.
*/
private void mergeMetadata(HashMap<String, String>[] infos, PdfWriter pdfWriter, XPathContext context)
throws XPathException
{
boolean override = getAttribBool("overrideMetadata", context, false);
HashMap<String, String> toPut = new HashMap();
if (override) {
toPut.putAll(infos[1]);
toPut.putAll(infos[0]);
}
else {
toPut.putAll(infos[0]);
toPut.putAll(infos[1]);
}
PdfDictionary outInfo = pdfWriter.getInfo();
for (String key : toPut.keySet())
{
// Keep iText as the producer
if (key.equals("Producer"))
continue;
// Filter out empty values.
String val = toPut.get(key).trim();
if (val.length() == 0)
continue;
// Add the new metadata
outInfo.put(new PdfName(key), new PdfString(val, PdfObject.TEXT_UNICODE));
}
}
private static class PageInfo
{
PdfReader reader;
int pageNum;
PdfImportedPage impPage;
Image image;
}
}
}