PDFXrefStreamParser.java example

Explorer
with-aes-master
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.pdfbox.pdfparser;

import java.io.IOException;

import java.util.ArrayList;
import java.util.Iterator;

import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.persistence.util.COSObjectKey;

/**
 * This will parse a PDF 1.5 (or better) Xref stream and
 * extract the xref information from the stream.
 *
 *  @author <a href="mailto:justinl@basistech.com">Justin LeFebvre</a>
 *  @version $Revision: 1.0 $
 */
public class PDFXrefStreamParser extends BaseParser
{
    private COSStream stream;
    private XrefTrailerResolver xrefTrailerResolver;

    /**
     * Constructor.
     *
     * @since 1.3.0
     * @param strm The stream to parse.
     * @param doc The document for the current parsing.
     * @param forceParsing flag to skip malformed or otherwise unparseable
     *                     input where possible
     * @param xrefTrailerResolver resolver to read the xref/trailer information
     *
     * @throws IOException If there is an error initializing the stream.
     */
    public PDFXrefStreamParser(
            COSStream strm, COSDocument doc, boolean forceParsing,
            XrefTrailerResolver xrefTrailerResolver )
            throws IOException
    {
        super(strm.getUnfilteredStream(), forceParsing);
        setDocument(doc);
        stream = strm;
        this.xrefTrailerResolver = xrefTrailerResolver;
    }

    /**
     * Parses through the unfiltered stream and populates the xrefTable HashMap.
     * @throws IOException If there is an error while parsing the stream.
     */
    public void parse() throws IOException
    {
        try
        {
            COSArray xrefFormat = (COSArray)stream.getDictionaryObject(COSName.W);
            COSArray indexArray = (COSArray)stream.getDictionaryObject(COSName.INDEX);
            /*
             * If Index doesn't exist, we will use the default values.
             */
            if(indexArray == null)
            {
                indexArray = new COSArray();
                indexArray.add(COSInteger.ZERO);
                indexArray.add(stream.getDictionaryObject(COSName.SIZE));
            }

            ArrayList<Integer> objNums = new ArrayList<Integer>();

            /*
             * Populates objNums with all object numbers available
             */
            Iterator<COSBase> indexIter = indexArray.iterator();
            while(indexIter.hasNext())
            {
                int objID = ((COSInteger)indexIter.next()).intValue();
                int size = ((COSInteger)indexIter.next()).intValue();
                for(int i = 0; i < size; i++)
                {
                    objNums.add(new Integer(objID + i));
                }
            }
            Iterator<Integer> objIter = objNums.iterator();
            /*
             * Calculating the size of the line in bytes
             */
            int w0 = xrefFormat.getInt(0);
            int w1 = xrefFormat.getInt(1);
            int w2 = xrefFormat.getInt(2);
            int lineSize = w0 + w1 + w2;

            while(pdfSource.available() > 0 && objIter.hasNext())
            {
                byte[] currLine = new byte[lineSize];
                pdfSource.read(currLine);

                int type = 0;
                /*
                 * Grabs the number of bytes specified for the first column in
                 * the W array and stores it.
                 */
                for(int i = 0; i < w0; i++)
                {
                    type += (currLine[i] & 0x00ff) << ((w0 - i - 1)* 8);
                }
                //Need to remember the current objID
                Integer objID = objIter.next();
                /*
                 * 3 different types of entries.
                 */
                switch(type)
                {
                    case 0:
                        /*
                         * Skipping free objects
                         */
                        break;
                    case 1:
                        int offset = 0;
                        for(int i = 0; i < w1; i++)
                        {
                            offset += (currLine[i + w0] & 0x00ff) << ((w1 - i - 1) * 8);
                        }
                        int genNum = 0;
                        for(int i = 0; i < w2; i++)
                        {
                            genNum += (currLine[i + w0 + w1] & 0x00ff) << ((w2 - i - 1) * 8);
                        }
                        COSObjectKey objKey = new COSObjectKey(objID.intValue(), genNum);
                        xrefTrailerResolver.setXRef(objKey, offset);
                        break;
                    case 2:
                        /*
                         * These objects are handled by the dereferenceObjects() method
                         * since they're only pointing to object numbers
                         */
                        break;
                    default:
                        break;
                }
            }
        }
        finally
        {
            pdfSource.close();
        }
    }
}