package com.tom_roush.pdfbox.pdmodel.graphics.image;
import com.tom_roush.pdfbox.cos.COSDictionary;
import com.tom_roush.pdfbox.cos.COSName;
import com.tom_roush.pdfbox.io.RandomAccess;
import com.tom_roush.pdfbox.io.RandomAccessFile;
import com.tom_roush.pdfbox.pdmodel.PDDocument;
import com.tom_roush.pdfbox.pdmodel.graphics.color.PDDeviceGray;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
/**
* Factory for creating a PDImageXObject containing a CCITT Fax compressed TIFF image.
*
* @author Ben Litchfield
* @author Paul King
*/
public final class CCITTFactory
{
private CCITTFactory()
{
}
/**
* Creates a new CCITT Fax compressed Image XObject from the first page of
* a TIFF file.
*
* @param document the document to create the image as part of.
* @param reader the random access TIFF file which contains a suitable CCITT
* compressed image
* @return a new Image XObject
* @throws IOException if there is an error reading the TIFF data.
*
* @deprecated Use {@link #createFromFile(PDDocument, File)} instead.
*/
@Deprecated
public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader)
throws IOException
{
return createFromRandomAccessImpl(document, reader, 0);
}
/**
* Creates a new CCITT Fax compressed Image XObject from a TIFF file.
*
* @param document the document to create the image as part of.
* @param reader the random access TIFF file which contains a suitable CCITT
* compressed image
* @param number TIFF image number, starting from 0
* @return a new Image XObject, or null if no such page
* @throws IOException if there is an error reading the TIFF data.
*
* @deprecated Use {@link #createFromFile(PDDocument, File, int)} instead.
*/
@Deprecated
public static PDImageXObject createFromRandomAccess(PDDocument document, RandomAccess reader,
int number) throws IOException
{
return createFromRandomAccessImpl(document, reader, number);
}
/**
* Creates a new CCITT Fax compressed Image XObject from the first page of
* a TIFF file.
*
* @param document the document to create the image as part of.
* @param file the TIFF file which contains a suitable CCITT compressed image
* @return a new Image XObject
* @throws IOException if there is an error reading the TIFF data.
*/
public static PDImageXObject createFromFile(PDDocument document, File file)
throws IOException
{
return createFromRandomAccessImpl(document, new RandomAccessFile(file, "r"), 0);
}
/**
* Creates a new CCITT Fax compressed Image XObject from the first page of
* a TIFF file.
*
* @param document the document to create the image as part of.
* @param file the TIFF file which contains a suitable CCITT compressed image
* @param number TIFF image number, starting from 0
* compressed image
* @return a new Image XObject
* @throws IOException if there is an error reading the TIFF data.
*/
public static PDImageXObject createFromFile(PDDocument document, File file, int number)
throws IOException
{
return createFromRandomAccessImpl(document, new RandomAccessFile(file, "r"), number);
}
/**
* Creates a new CCITT Fax compressed Image XObject from a TIFF file.
*
* @param document the document to create the image as part of.
* @param reader the random access TIFF file which contains a suitable CCITT
* compressed image
* @param number TIFF image number, starting from 0
* @return a new Image XObject, or null if no such page
* @throws IOException if there is an error reading the TIFF data.
*/
private static PDImageXObject createFromRandomAccessImpl(PDDocument document,
RandomAccess reader,
int number) throws IOException
{
COSDictionary decodeParms = new COSDictionary();
ByteArrayOutputStream bos = new ByteArrayOutputStream();
extractFromTiff(reader, bos, decodeParms, number);
if (bos.size() == 0)
{
return null;
}
ByteArrayInputStream encodedByteStream = new ByteArrayInputStream(bos.toByteArray());
PDImageXObject pdImage = new PDImageXObject(document,
encodedByteStream,
COSName.CCITTFAX_DECODE,
decodeParms.getInt(COSName.COLUMNS),
decodeParms.getInt(COSName.ROWS),
1, PDDeviceGray.INSTANCE);
COSDictionary dict = pdImage.getCOSStream();
dict.setItem(COSName.DECODE_PARMS, decodeParms);
return pdImage;
}
// extracts the CCITT stream from the TIFF file
private static void extractFromTiff(RandomAccess reader, OutputStream os,
COSDictionary params, int number) throws IOException
{
try
{
// First check the basic tiff header
reader.seek(0);
char endianess = (char) reader.read();
if ((char) reader.read() != endianess)
{
throw new IOException("Not a valid tiff file");
}
// ensure that endianess is either M or I
if (endianess != 'M' && endianess != 'I')
{
throw new IOException("Not a valid tiff file");
}
int magicNumber = readshort(endianess, reader);
if (magicNumber != 42)
{
throw new IOException("Not a valid tiff file");
}
// Relocate to the first set of tags
int address = readlong(endianess, reader);
reader.seek(address);
// If some higher page number is required, skip this page's tags,
// then read the next page's address
for (int i = 0; i < number; i++)
{
int numtags = readshort(endianess, reader);
if (numtags > 50)
{
throw new IOException("Not a valid tiff file");
}
reader.seek(address + 2 + numtags * 12);
address = readlong(endianess, reader);
if (address == 0)
{
return;
}
reader.seek(address);
}
int numtags = readshort(endianess, reader);
// The number 50 is somewhat arbitary, it just stops us load up junk from somewhere
// and tramping on
if (numtags > 50)
{
throw new IOException("Not a valid tiff file");
}
// Loop through the tags, some will convert to items in the params dictionary
// Other point us to where to find the data stream.
// The only param which might change as a result of other TIFF tags is K, so
// we'll deal with that differently.
// Default value to detect error
int k = -1000;
int dataoffset = 0;
int datalength = 0;
for (int i = 0; i < numtags; i++)
{
int tag = readshort(endianess, reader);
int type = readshort(endianess, reader);
int count = readlong(endianess, reader);
int val = readlong(endianess, reader); // See note
// Note, we treated that value as a long. The value always occupies 4 bytes
// But it might only use the first byte or two. Depending on endianess we might
// need to correct.
// Note we ignore all other types, they are of little interest for PDFs/CCITT Fax
if (endianess == 'M')
{
switch (type)
{
case 1:
{
val = val >> 24;
break; // byte value
}
case 3:
{
val = val >> 16;
break; // short value
}
case 4:
{
break; // long value
}
default:
{
// do nothing
}
}
}
switch (tag)
{
case 256:
{
params.setInt(COSName.COLUMNS, val);
break;
}
case 257:
{
params.setInt(COSName.ROWS, val);
break;
}
case 259:
{
if (val == 4)
{
k = -1;
}
if (val == 3)
{
k = 0;
}
break; // T6/T4 Compression
}
case 262:
{
if (val == 1)
{
params.setBoolean(COSName.BLACK_IS_1, true);
}
break;
}
case 273:
{
if (count == 1)
{
dataoffset = val;
}
break;
}
case 279:
{
if (count == 1)
{
datalength = val;
}
break;
}
case 292:
{
if ((val & 1) != 0)
{
k = 50; // T4 2D - arbitary positive K value
}
// http://www.awaresystems.be/imaging/tiff/tifftags/t4options.html
if ((val & 4) != 0)
{
throw new IOException("CCITT Group 3 'uncompressed mode' is not supported");
}
if ((val & 2) != 0)
{
throw new IOException("CCITT Group 3 'fill bits before EOL' is not supported");
}
break;
}
case 324:
{
if (count == 1)
{
dataoffset = val;
}
break;
}
case 325:
{
if (count == 1)
{
datalength = val;
}
break;
}
default:
{
// do nothing
}
}
}
if (k == -1000)
{
throw new IOException("First image in tiff is not CCITT T4 or T6 compressed");
}
if (dataoffset == 0)
{
throw new IOException("First image in tiff is not a single tile/strip");
}
params.setInt(COSName.K, k);
reader.seek(dataoffset);
byte[] buf = new byte[8192];
int amountRead;
while ((amountRead = reader.read(buf, 0, Math.min(8192, datalength))) > 0)
{
datalength -= amountRead;
os.write(buf, 0, amountRead);
}
}
finally
{
os.close();
}
}
private static int readshort(char endianess, RandomAccess raf) throws IOException
{
if (endianess == 'I')
{
return raf.read() | (raf.read() << 8);
}
return (raf.read() << 8) | raf.read();
}
private static int readlong(char endianess, RandomAccess raf) throws IOException
{
if (endianess == 'I')
{
return raf.read() | (raf.read() << 8) | (raf.read() << 16) | (raf.read() << 24);
}
return (raf.read() << 24) | (raf.read() << 16) | (raf.read() << 8) | raf.read();
}
}