package com.tom_roush.pdfbox.pdfparser;
import com.tom_roush.pdfbox.cos.COSArray;
import com.tom_roush.pdfbox.cos.COSBase;
import com.tom_roush.pdfbox.cos.COSDocument;
import com.tom_roush.pdfbox.cos.COSInteger;
import com.tom_roush.pdfbox.cos.COSName;
import com.tom_roush.pdfbox.cos.COSObjectKey;
import com.tom_roush.pdfbox.cos.COSStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
/**
* This will parse a PDF 1.5 (or better) Xref stream and
* extract the xref information from the stream.
*
* @author Justin LeFebvre
*/
public class PDFXrefStreamParser extends BaseParser
{
private final COSStream stream;
private final XrefTrailerResolver xrefTrailerResolver;
/**
* Constructor.
*
* @param stream The stream to parse.
* @param document The document for the current parsing.
* @param resolver resolver to read the xref/trailer information
*
* @throws IOException If there is an error initializing the stream.
*/
public PDFXrefStreamParser(COSStream stream, COSDocument document, XrefTrailerResolver resolver)
throws IOException
{
super(new InputStreamSource(stream.getUnfilteredStream()));
this.document = document;
this.stream = stream;
this.xrefTrailerResolver = resolver;
}
/**
* Parses through the unfiltered stream and populates the xrefTable HashMap.
* @throws IOException If there is an error while parsing the stream.
*/
public void parse() throws IOException
{
COSBase w = stream.getDictionaryObject(COSName.W);
if (!(w instanceof COSArray))
{
throw new IOException("/W array is missing in Xref stream");
}
COSArray xrefFormat = (COSArray) w;
COSArray indexArray = (COSArray)stream.getDictionaryObject(COSName.INDEX);
/*
* If Index doesn't exist, we will use the default values.
*/
if(indexArray == null)
{
indexArray = new COSArray();
indexArray.add(COSInteger.ZERO);
indexArray.add(stream.getDictionaryObject(COSName.SIZE));
}
List<Long> objNums = new ArrayList<Long>();
/*
* Populates objNums with all object numbers available
*/
Iterator<COSBase> indexIter = indexArray.iterator();
while(indexIter.hasNext())
{
long objID = ((COSInteger)indexIter.next()).longValue();
int size = ((COSInteger)indexIter.next()).intValue();
for(int i = 0; i < size; i++)
{
objNums.add(objID + i);
}
}
Iterator<Long> objIter = objNums.iterator();
/*
* Calculating the size of the line in bytes
*/
int w0 = xrefFormat.getInt(0);
int w1 = xrefFormat.getInt(1);
int w2 = xrefFormat.getInt(2);
int lineSize = w0 + w1 + w2;
while (!seqSource.isEOF() && objIter.hasNext())
{
byte[] currLine = new byte[lineSize];
seqSource.read(currLine);
int type = 0;
/*
* Grabs the number of bytes specified for the first column in
* the W array and stores it.
*/
for(int i = 0; i < w0; i++)
{
type += (currLine[i] & 0x00ff) << ((w0 - i - 1)* 8);
}
//Need to remember the current objID
Long objID = objIter.next();
/*
* 3 different types of entries.
*/
switch(type)
{
case 0:
/*
* Skipping free objects
*/
break;
case 1:
int offset = 0;
for(int i = 0; i < w1; i++)
{
offset += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
}
int genNum = 0;
for(int i = 0; i < w2; i++)
{
genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8);
}
COSObjectKey objKey = new COSObjectKey(objID, genNum);
xrefTrailerResolver.setXRef(objKey, offset);
break;
case 2:
/*
* object stored in object stream:
* 2nd argument is object number of object stream
* 3rd argument is index of object within object stream
*
* For sequential PDFParser we do not need this information
* because
* These objects are handled by the dereferenceObjects() method
* since they're only pointing to object numbers
*
* However for XRef aware parsers we have to know which objects contain
* object streams. We will store this information in normal xref mapping
* table but add object stream number with minus sign in order to
* distinguish from file offsets
*/
int objstmObjNr = 0;
for(int i = 0; i < w1; i++)
{
objstmObjNr += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
}
objKey = new COSObjectKey( objID, 0 );
xrefTrailerResolver.setXRef( objKey, -objstmObjNr );
break;
default:
break;
}
}
}
}