PDFXrefStreamParser.java example

Explorer
PdfBox-Android-master
- library
  - src
- sample
  - src
    - main
      - java
        com
        tom_roush
        pdfbox
        sample
        MainActivity.java
package com.tom_roush.pdfbox.pdfparser;

import com.tom_roush.pdfbox.cos.COSArray;
import com.tom_roush.pdfbox.cos.COSBase;
import com.tom_roush.pdfbox.cos.COSDocument;
import com.tom_roush.pdfbox.cos.COSInteger;
import com.tom_roush.pdfbox.cos.COSName;
import com.tom_roush.pdfbox.cos.COSObjectKey;
import com.tom_roush.pdfbox.cos.COSStream;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * This will parse a PDF 1.5 (or better) Xref stream and
 * extract the xref information from the stream.
 *
 *  @author Justin LeFebvre
 */
public class PDFXrefStreamParser extends BaseParser
{
    private final COSStream stream;
    private final XrefTrailerResolver xrefTrailerResolver;

    /**
     * Constructor.
     *
     * @param stream The stream to parse.
     * @param document The document for the current parsing.
     * @param resolver resolver to read the xref/trailer information
     *
     * @throws IOException If there is an error initializing the stream.
     */
    public PDFXrefStreamParser(COSStream stream, COSDocument document, XrefTrailerResolver resolver)
        throws IOException
    {
        super(new InputStreamSource(stream.getUnfilteredStream()));
        this.document = document;
        this.stream = stream;
        this.xrefTrailerResolver = resolver;
    }

    /**
     * Parses through the unfiltered stream and populates the xrefTable HashMap.
     * @throws IOException If there is an error while parsing the stream.
     */
    public void parse() throws IOException
    {
        COSBase w = stream.getDictionaryObject(COSName.W);
        if (!(w instanceof COSArray))
        {
            throw new IOException("/W array is missing in Xref stream");
        }
        COSArray xrefFormat = (COSArray) w;

        COSArray indexArray = (COSArray)stream.getDictionaryObject(COSName.INDEX);
        /*
         * If Index doesn't exist, we will use the default values.
         */
        if(indexArray == null)
        {
            indexArray = new COSArray();
            indexArray.add(COSInteger.ZERO);
            indexArray.add(stream.getDictionaryObject(COSName.SIZE));
        }

        List<Long> objNums = new ArrayList<Long>();

        /*
         * Populates objNums with all object numbers available
         */
        Iterator<COSBase> indexIter = indexArray.iterator();
        while(indexIter.hasNext())
        {
            long objID = ((COSInteger)indexIter.next()).longValue();
            int size = ((COSInteger)indexIter.next()).intValue();
            for(int i = 0; i < size; i++)
            {
                objNums.add(objID + i);
            }
        }
        Iterator<Long> objIter = objNums.iterator();
        /*
         * Calculating the size of the line in bytes
         */
        int w0 = xrefFormat.getInt(0);
        int w1 = xrefFormat.getInt(1);
        int w2 = xrefFormat.getInt(2);
        int lineSize = w0 + w1 + w2;

        while (!seqSource.isEOF() && objIter.hasNext())
        {
            byte[] currLine = new byte[lineSize];
            seqSource.read(currLine);

            int type = 0;
            /*
             * Grabs the number of bytes specified for the first column in
             * the W array and stores it.
             */
            for(int i = 0; i < w0; i++)
            {
                type += (currLine[i] & 0x00ff) << ((w0 - i - 1)* 8);
            }
            //Need to remember the current objID
            Long objID = objIter.next();
            /*
             * 3 different types of entries.
             */
            switch(type)
            {
                case 0:
                    /*
                     * Skipping free objects
                     */
                    break;
                case 1:
                    int offset = 0;
                    for(int i = 0; i < w1; i++)
                    {
                        offset += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
                    }
                    int genNum = 0;
                    for(int i = 0; i < w2; i++)
                    {
                        genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8);
                    }
                    COSObjectKey objKey = new COSObjectKey(objID, genNum);
                    xrefTrailerResolver.setXRef(objKey, offset);
                    break;
                case 2:
                    /*
                     * object stored in object stream:
                     * 2nd argument is object number of object stream
                     * 3rd argument is index of object within object stream
                     *
                     * For sequential PDFParser we do not need this information
                     * because
                     * These objects are handled by the dereferenceObjects() method
                     * since they're only pointing to object numbers
                     *
                     * However for XRef aware parsers we have to know which objects contain
                     * object streams. We will store this information in normal xref mapping
                     * table but add object stream number with minus sign in order to
                     * distinguish from file offsets
                     */
                    int objstmObjNr = 0;
                    for(int i = 0; i < w1; i++)
                    {
                        objstmObjNr += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
                    }
                    objKey = new COSObjectKey( objID, 0 );
                    xrefTrailerResolver.setXRef( objKey, -objstmObjNr );
                    break;
                default:
                    break;
            }
        }
    }
}