/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.pdfbox.pdmodel.graphics.xobject; import java.awt.image.BufferedImage; import java.awt.image.DataBufferByte; import java.awt.image.WritableRaster; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.List; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccess; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.pdmodel.common.PDStream; import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; /** * An image class for CCITT Fax. * * @author <a href="ben@benlitchfield.com">Ben Litchfield</a> * @author paul king * @version $Revision: 1.6 $ */ public class PDCcitt extends PDXObjectImage { private static final List<String> FAX_FILTERS = new ArrayList<String>(); static { FAX_FILTERS.add( COSName.CCITTFAX_DECODE.getName() ); FAX_FILTERS.add( COSName.CCITTFAX_DECODE_ABBREVIATION.getName() ); } /** * Standard constructor. * * @param ccitt The PDStream that already contains all ccitt information. */ public PDCcitt(PDStream ccitt) { super(ccitt, "tiff"); } /** * Construct from a tiff file. * * @param doc The document to create the image as part of. * @param raf The random access TIFF file which contains a suitable CCITT compressed image * @throws IOException If there is an error reading the tiff data. */ public PDCcitt( PDDocument doc, RandomAccess raf ) throws IOException { super( new PDStream(doc),"tiff"); // super( new PDStream( doc, null, true ), "tiff" ); COSDictionary decodeParms = new COSDictionary(); COSDictionary dic = getCOSStream(); extractFromTiff(raf, getCOSStream().createFilteredStream(),decodeParms); dic.setItem( COSName.FILTER, COSName.CCITTFAX_DECODE); dic.setItem( COSName.SUBTYPE, COSName.IMAGE); dic.setItem( COSName.TYPE, COSName.XOBJECT ); dic.setItem( COSName.DECODE_PARMS, decodeParms); setBitsPerComponent( 1 ); setColorSpace( new PDDeviceGray() ); setWidth( decodeParms.getInt(COSName.COLUMNS) ); setHeight( decodeParms.getInt(COSName.ROWS) ); } /** * Returns an image of the CCITT Fax, or null if TIFFs are not supported. (Requires additional JAI Image filters ) * * {@inheritDoc} */ public BufferedImage getRGBImage() throws IOException { COSStream stream = getCOSStream(); COSBase decodeP = stream.getDictionaryObject(COSName.DECODE_PARMS); COSDictionary decodeParms = null; if (decodeP instanceof COSDictionary) { decodeParms = (COSDictionary)decodeP; } else if (decodeP instanceof COSArray) { decodeParms = (COSDictionary)((COSArray)decodeP).get(0); } int cols = decodeParms.getInt(COSName.COLUMNS, 1728); int rows = decodeParms.getInt(COSName.ROWS, 0); int height = stream.getInt(COSName.HEIGHT, 0); if (rows > 0 && height > 0) { // ensure that rows doesn't contain implausible data, see PDFBOX-771 rows = Math.min(rows, height); } else { // at least one of the values has to have a valid value rows = Math.max(rows, height); } boolean blackIsOne = decodeParms.getBoolean(COSName.BLACK_IS_1, false); BufferedImage image = new BufferedImage(cols, rows, BufferedImage.TYPE_BYTE_BINARY); WritableRaster raster = image.getRaster(); DataBufferByte buffer = (DataBufferByte)raster.getDataBuffer(); byte[] bufferData = buffer.getData(); IOUtils.populateBuffer(stream.getUnfilteredStream(), bufferData); if (!blackIsOne) { //Inverting the bitmap //Note the previous approach with starting from an IndexColorModel didn't work //reliably. In some cases the image wouldn't be painted for some reason. //So a safe but slower approach was taken. invertBitmap(bufferData); } return image; } private void invertBitmap(byte[] bufferData) { for (int i = 0, c = bufferData.length; i < c; i++) { bufferData[i] = (byte)(~bufferData[i] & 0xFF); } } /** * This writes a tiff to out. * * {@inheritDoc} */ public void write2OutputStream(OutputStream out) throws IOException { // We should use another format than TIFF to get rid of the TiffWrapper InputStream data = new TiffWrapper( getPDStream().getPartiallyFilteredStream( FAX_FILTERS ), getCOSStream()); IOUtils.copy(data, out); } /** * Extract the ccitt stream from the tiff file. * * @param raf - TIFF File * @param os - Stream to write raw ccitt data two * @param parms - COSDictionary which the encoding parameters are added to * @throws IOException If there is an error reading/writing to/from the stream */ private void extractFromTiff(RandomAccess raf, OutputStream os, COSDictionary parms) throws IOException { try { // First check the basic tiff header raf.seek(0); char endianess = (char) raf.read(); if ((char) raf.read() != endianess) { throw new IOException("Not a valid tiff file"); } //ensure that endianess is either M or I if (endianess != 'M' && endianess != 'I') { throw new IOException("Not a valid tiff file"); } int magicNumber = readshort(endianess, raf); if( magicNumber != 42) { throw new IOException("Not a valid tiff file"); } // Relocate to the first set of tags raf.seek(readlong(endianess, raf)); int numtags = readshort(endianess, raf); // The number 50 is somewhat arbitary, it just stops us load up junk from somewhere and tramping on if (numtags > 50) { throw new IOException("Not a valid tiff file"); } // Loop through the tags, some will convert to items in the parms dictionary // Other point us to where to find the data stream // The only parm which might change as a result of other options is K, so // We'll deal with that as a special; int k=-1000; // Default Non CCITT compression int dataoffset=0; int datalength=0; for (int i=0; i < numtags; i++) { int tag = readshort(endianess, raf); int type = readshort(endianess, raf); int count = readlong(endianess, raf); int val = readlong(endianess, raf); // See note // Note, we treated that value as a long. The value always occupies 4 bytes // But it might only use the first byte or two. Depending on endianess we might need to correct // Note we ignore all other types, they are of little interest for PDFs/CCITT Fax if (endianess == 'M') { switch (type) { case 1: { val = val >> 24; break; // byte value } case 3: { val = val >> 16; break; // short value } case 4: { break; // long value } default: { //do nothing } } } switch (tag) { case 256: { parms.setInt(COSName.COLUMNS,val); break; } case 257: { parms.setInt(COSName.ROWS,val); break; } case 259: { if (val == 4) { k=-1; } if (val == 3) { k=0; } break; // T6/T4 Compression } case 262: { if (val == 1) { parms.setBoolean(COSName.BLACK_IS_1, true); } break; } case 273: { if (count == 1) { dataoffset=val; } break; } case 279: { if (count == 1) { datalength=val; } break; } case 292: { if (val == 1) { k=50; // T4 2D - arbitary K value } break; } case 324: { if (count == 1) { dataoffset=val; } break; } case 325: { if (count == 1) { datalength=val; } break; } default: { //do nothing } } } if (k == -1000) { throw new IOException("First image in tiff is not CCITT T4 or T6 compressed"); } if (dataoffset == 0) { throw new IOException("First image in tiff is not a single tile/strip"); } parms.setInt(COSName.K,k); raf.seek(dataoffset); byte[] buf = new byte[8192]; int amountRead = -1; while( (amountRead = raf.read( buf,0, Math.min(8192,datalength) )) > 0 ) { datalength -= amountRead; os.write( buf, 0, amountRead ); } } finally { os.close(); } } private int readshort(char endianess, RandomAccess raf) throws IOException { if (endianess == 'I') { return raf.read() | (raf.read() << 8); } return (raf.read() << 8) | raf.read(); } private int readlong(char endianess, RandomAccess raf) throws IOException { if (endianess == 'I') { return raf.read() | (raf.read() << 8) | (raf.read() << 16) | (raf.read() << 24); } return (raf.read() << 24) | (raf.read() << 16) | (raf.read() << 8) | raf.read(); } /** * Extends InputStream to wrap the data from the CCITT Fax with a suitable TIFF Header. * For details see www.tiff.org, which contains useful information including pointers to the * TIFF 6.0 Specification * */ private class TiffWrapper extends InputStream { private int currentOffset; // When reading, where in the tiffheader are we. private byte[] tiffheader; // Byte array to store tiff header data private InputStream datastream; // Original InputStream private TiffWrapper(InputStream rawstream, COSDictionary options) { buildHeader(options); currentOffset=0; datastream = rawstream; } // Implement basic methods from InputStream /** * {@inheritDoc} */ public boolean markSupported() { return false; } /** * {@inheritDoc} */ public void reset() throws IOException { throw new IOException("reset not supported"); } /** * For simple read, take a byte from the tiffheader array or pass through. * * {@inheritDoc} */ public int read() throws IOException { if (currentOffset < tiffheader.length) { return tiffheader[currentOffset++]; } return datastream.read(); } /** * For read methods only return as many bytes as we have left in the header * if we've exhausted the header, pass through to the InputStream of the raw CCITT data. * * {@inheritDoc} */ public int read(byte[] data) throws IOException { if (currentOffset < tiffheader.length) { int length = java.lang.Math.min(tiffheader.length - currentOffset, data.length); if (length > 0) { System.arraycopy(tiffheader, currentOffset, data, 0, length); } currentOffset += length; return length; } else { return datastream.read(data); } } /** * For read methods only return as many bytes as we have left in the header * if we've exhausted the header, pass through to the InputStream of the raw CCITT data. * * {@inheritDoc} */ public int read(byte[] data, int off, int len) throws IOException { if (currentOffset < tiffheader.length) { int length = java.lang.Math.min(tiffheader.length - currentOffset, len); if (length > 0) { System.arraycopy(tiffheader, currentOffset, data, off, length); } currentOffset += length; return length; } else { return datastream.read(data,off,len); } } /** * When skipping if any header data not yet read, only allow to skip what we've in the buffer * Otherwise just pass through. * * {@inheritDoc} */ public long skip(long n) throws IOException { if (currentOffset < tiffheader.length) { long length = Math.min(tiffheader.length - currentOffset, n); currentOffset += length; return length; } else { return datastream.skip(n); } } // Static data for the beginning of the TIFF header private final byte[] basicHeader = { 'I','I',42,0,8,0,0,0, // File introducer and pointer to first IFD 0,0}; // Number of tags start with two private int additionalOffset; // Offset in header to additional data // Builds up the tiffheader based on the options passed through. private void buildHeader(COSDictionary options) { final int numOfTags = 10; // The maximum tags we'll fill final int maxAdditionalData = 24; // The maximum amount of additional data // outside the IFDs. (bytes) // The length of the header will be the length of the basic header (10) // plus 12 bytes for each IFD, 4 bytes as a pointer to the next IFD (will be 0) // plus the length of the additional data int ifdSize = 10 + (12 * numOfTags ) + 4; tiffheader = new byte[ifdSize + maxAdditionalData]; java.util.Arrays.fill(tiffheader,(byte)0); System.arraycopy(basicHeader,0,tiffheader,0,basicHeader.length); // Additional data outside the IFD starts after the IFD's and pointer to the next IFD (0) additionalOffset = ifdSize; // Now work out the variable values from TIFF defaults, // PDF Defaults and the Dictionary for this XObject short cols = 1728; short rows = 0; short blackis1 = 0; short comptype = 3; // T4 compression long t4options = 0; // Will set if 1d or 2d T4 COSArray decode = getDecode(); // we have to invert the b/w-values, // if the Decode array exists and consists of (1,0) if (decode != null && decode.getInt(0) == 1) { blackis1 = 1; } COSBase dicOrArrayParms = options.getDictionaryObject(COSName.DECODE_PARMS); COSDictionary decodeParms = null; if( dicOrArrayParms instanceof COSDictionary ) { decodeParms = (COSDictionary)dicOrArrayParms; } else { COSArray parmsArray = (COSArray)dicOrArrayParms; if( parmsArray.size() == 1 ) { decodeParms = (COSDictionary)parmsArray.getObject( 0 ); } else { //else find the first dictionary with Row/Column info and use that. for( int i=0; i<parmsArray.size() && decodeParms == null; i++ ) { COSDictionary dic = (COSDictionary)parmsArray.getObject( i ); if (dic != null && ( dic.getDictionaryObject(COSName.COLUMNS) != null || dic.getDictionaryObject(COSName.ROWS) != null)) { decodeParms = dic; } } } } if (decodeParms != null) { cols = (short) decodeParms.getInt(COSName.COLUMNS, cols); rows = (short) decodeParms.getInt(COSName.ROWS, rows); if (decodeParms.getBoolean(COSName.BLACK_IS_1, false)) { blackis1 = 1; } int k = decodeParms.getInt(COSName.K, 0); // Mandatory parm if (k < 0) { //T6 comptype = 4; } if (k > 0) { //T4 2D comptype = 3; t4options = 1; } // else k = 0, leave as default T4 1D compression } // If we couldn't get the number of rows, use the main item from XObject if (rows == 0) { rows = (short) options.getInt(COSName.HEIGHT, rows); } // Now put the tags into the tiffheader // These musn't exceed the maximum set above, and by TIFF spec should be sorted into // Numeric sequence. addTag(256, cols); // Columns addTag(257, rows); // Rows addTag(259, comptype); // T6 addTag(262, blackis1); // Photometric Interpretation addTag(273, tiffheader.length); // Offset to start of image data - updated below addTag(279, options.getInt(COSName.LENGTH)); // Length of image data addTag(282, 300, 1); // X Resolution 300 (default unit Inches) This is arbitary addTag(283, 300, 1); // Y Resolution 300 (default unit Inches) This is arbitary if (comptype == 3) { addTag(292, t4options); } addTag(305, "PDFBOX"); // Software generating image } /* Tiff types 1 = byte, 2=ascii, 3=short, 4=ulong 5=rational */ private void addTag(int tag,long value) { // Adds a tag of type 4 (ulong) int count = ++tiffheader[8]; int offset = (count-1)*12 + 10; tiffheader[offset]=(byte)(tag & 0xff); tiffheader[offset+1]=(byte)((tag>>8) & 0xff); tiffheader[offset+2]=4; // Type Long tiffheader[offset+4]=1; // One Value tiffheader[offset+8]=(byte)(value & 0xff); tiffheader[offset+9]=(byte)((value>>8) & 0xff); tiffheader[offset+10]=(byte)((value>>16) & 0xff); tiffheader[offset+11]=(byte)((value>>24) & 0xff); } private void addTag(int tag, short value) { // Adds a tag of type 3 (short) int count = ++tiffheader[8]; int offset = (count-1)*12 + 10; tiffheader[offset]=(byte)(tag & 0xff); tiffheader[offset+1]=(byte)((tag>>8) & 0xff); tiffheader[offset+2]=3; // Type Short tiffheader[offset+4]=1; // One Value tiffheader[offset+8]=(byte)(value & 0xff); tiffheader[offset+9]=(byte)((value>>8) & 0xff); } private void addTag(int tag, String value) { // Adds a tag of type 2 (ascii) int count = ++tiffheader[8]; int offset = (count-1)*12 + 10; tiffheader[offset]=(byte)(tag & 0xff); tiffheader[offset+1]=(byte)((tag>>8) & 0xff); tiffheader[offset+2]=2; // Type Ascii int len = value.length() + 1; tiffheader[offset+4]=(byte)(len & 0xff); tiffheader[offset+8]=(byte)(additionalOffset & 0xff); tiffheader[offset+9]=(byte)((additionalOffset>>8) & 0xff); tiffheader[offset+10]=(byte)((additionalOffset>>16) & 0xff); tiffheader[offset+11]=(byte)((additionalOffset>>24) & 0xff); try { System.arraycopy(value.getBytes("US-ASCII"), 0, tiffheader, additionalOffset, value.length()); } catch (UnsupportedEncodingException e) { throw new RuntimeException("Incompatible VM without US-ASCII encoding", e); } additionalOffset += len; } private void addTag(int tag, long numerator, long denominator) { // Adds a tag of type 5 (rational) int count = ++tiffheader[8]; int offset = (count-1)*12 + 10; tiffheader[offset]=(byte)(tag & 0xff); tiffheader[offset+1]=(byte)((tag>>8) & 0xff); tiffheader[offset+2]=5; // Type Rational tiffheader[offset+4]=1; // One Value tiffheader[offset+8]=(byte)(additionalOffset & 0xff); tiffheader[offset+9]=(byte)((additionalOffset>>8) & 0xff); tiffheader[offset+10]=(byte)((additionalOffset>>16) & 0xff); tiffheader[offset+11]=(byte)((additionalOffset>>24) & 0xff); tiffheader[additionalOffset++]=(byte) ((numerator) & 0xFF); tiffheader[additionalOffset++]=(byte) ((numerator>>8) & 0xFF); tiffheader[additionalOffset++]=(byte) ((numerator>>16) & 0xFF); tiffheader[additionalOffset++]=(byte) ((numerator>>24) & 0xFF); tiffheader[additionalOffset++]=(byte) ((denominator) & 0xFF); tiffheader[additionalOffset++]=(byte) ((denominator>>8) & 0xFF); tiffheader[additionalOffset++]=(byte) ((denominator>>16) & 0xFF); tiffheader[additionalOffset++]=(byte) ((denominator>>24) & 0xFF); } } }