PreflightParser.java example

Explorer
pdfbox-master
- pdfbox-trunk
/*****************************************************************************
 * 
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 * 
 ****************************************************************************/

package org.apache.pdfbox.preflight.parser;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.activation.DataSource;
import javax.activation.FileDataSource;
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSFloat;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSNull;
import org.apache.pdfbox.cos.COSNumber;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream;
import org.apache.pdfbox.io.ScratchFile;
import org.apache.pdfbox.pdfparser.PDFObjectStreamParser;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.preflight.Format;
import org.apache.pdfbox.preflight.PreflightConfiguration;
import org.apache.pdfbox.preflight.PreflightConstants;
import org.apache.pdfbox.preflight.PreflightContext;
import org.apache.pdfbox.preflight.PreflightDocument;
import org.apache.pdfbox.preflight.ValidationResult;
import org.apache.pdfbox.preflight.ValidationResult.ValidationError;
import org.apache.pdfbox.preflight.exception.SyntaxValidationException;


import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_ARRAY_TOO_LONG;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_CROSS_REF;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_INVALID;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_TOO_LONG;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_INVALID_OFFSET;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_MISSING_OFFSET;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NAME_TOO_LONG;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES;
import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF;
import static org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS;
import static org.apache.pdfbox.preflight.PreflightConstants.MAX_DICT_ENTRIES;
import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NAME_SIZE;
import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NEGATIVE_FLOAT;
import static org.apache.pdfbox.preflight.PreflightConstants.MAX_POSITIVE_FLOAT;
import static org.apache.pdfbox.preflight.PreflightConstants.MAX_STRING_LENGTH;

public class PreflightParser extends PDFParser
{
    /**
     * Define a one byte encoding that hasn't specific encoding in UTF-8 charset. Avoid unexpected error when the
     * encoding is Cp5816
     */
    public static final Charset encoding = Charset.forName("ISO-8859-1");

    protected DataSource dataSource;

    protected ValidationResult validationResult;

    protected PreflightDocument preflightDocument;

    protected PreflightContext ctx;

    /**
     * Constructor.
     *
     * @param file
     * @throws IOException if there is a reading error.
     */
    public PreflightParser(File file) throws IOException
    {
        // TODO move file handling outside of the parser
        super(new RandomAccessBufferedFileInputStream(file));
        this.setLenient(false);
        this.dataSource = new FileDataSource(file);
    }

    /**
     * Constructor.
     *
     * @param file
     * @param scratch
     * @throws IOException if there is a reading error.
     */
    public PreflightParser(File file, ScratchFile scratch) throws IOException
    {
        // TODO move file handling outside of the parser
        super(new RandomAccessBufferedFileInputStream(file), scratch);
        this.setLenient(false);
        this.dataSource = new FileDataSource(file);
    }

    /**
     * Constructor.
     *
     * @param filename
     * @throws IOException if there is a reading error.
     */
    public PreflightParser(String filename) throws IOException
    {
        // TODO move file handling outside of the parser
        this(new File(filename));
    }

    /**
     * Constructor.
     *
     * @param filename
     * @param scratch
     * @throws IOException if there is a reading error.
     */
    public PreflightParser(String filename, ScratchFile scratch) throws IOException
    {
        // TODO move file handling outside of the parser
        this(new File(filename), scratch);
    }

    /**
     * Constructor. This one is slower than the file and the filename constructors, because
     * a temporary file will be created.
     *
     * @param dataSource the datasource
     * @throws IOException if there is a reading error.
     */
    public PreflightParser(DataSource dataSource) throws IOException
    {
        // TODO move file handling outside of the parser
        super(new RandomAccessBufferedFileInputStream(dataSource.getInputStream()));
        this.setLenient(false);
        this.dataSource = dataSource;
    }

    /**
     * Constructor. This one is slower than the file and the filename constructors, because
     * a temporary file will be created.
     *
     * @param dataSource the datasource
     * @param scratch
     * @throws IOException if there is a reading error.
     */
    public PreflightParser(DataSource dataSource, ScratchFile scratch) throws IOException
    {
        // TODO move file handling outside of the parser
        super(new RandomAccessBufferedFileInputStream(dataSource.getInputStream()), scratch);
        this.setLenient(false);
        this.dataSource = dataSource;
    }

    /**
     * Create an instance of ValidationResult with a ValidationError(UNKNOWN_ERROR)
     * 
     * @return the ValidationError instance.
     */
    protected static ValidationResult createUnknownErrorResult()
    {
        ValidationError error = new ValidationError(PreflightConstants.ERROR_UNKOWN_ERROR);
        return new ValidationResult(error);
    }

    /**
     * Add the error to the ValidationResult. If the validationResult is null, an instance is created using the
     * isWarning boolean of the ValidationError to know if the ValidationResult must be flagged as Valid.
     * 
     * @param error
     */
    protected void addValidationError(ValidationError error)
    {
        if (this.validationResult == null)
        {
            this.validationResult = new ValidationResult(error.isWarning());
        }
        this.validationResult.addError(error);
    }

    protected void addValidationErrors(List<ValidationError> errors)
    {
        for (ValidationError error : errors)
        {
            addValidationError(error);
        }
    }

    @Override
    public void parse() throws IOException
    {
        parse(Format.PDF_A1B);
    }

    /**
     * Parse the given file and check if it is a confirming file according to the given format.
     * 
     * @param format
     *            format that the document should follow (default {@link Format#PDF_A1B})
     * @throws IOException
     */
    public void parse(Format format) throws IOException
    {
        parse(format, null);
    }

    /**
     * Parse the given file and check if it is a confirming file according to the given format.
     * 
     * @param format
     *            format that the document should follow (default {@link Format#PDF_A1B})
     * @param config
     *            Configuration bean that will be used by the PreflightDocument. If null the format is used to determine
     *            the default configuration.
     * @throws IOException
     */
    public void parse(Format format, PreflightConfiguration config) throws IOException
    {
        checkPdfHeader();
        try
        {
            super.parse();
        }
        catch (IOException e)
        {
            addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage()));
            throw new SyntaxValidationException(e, this.validationResult);
        }
        finally
        {
            // TODO move file handling outside of the parser
            IOUtils.closeQuietly(source);
        }
        Format formatToUse = (format == null ? Format.PDF_A1B : format);
        createPdfADocument(formatToUse, config);
        createContext();
    }

    protected void createPdfADocument(Format format, PreflightConfiguration config) throws IOException
    {
        COSDocument cosDocument = getDocument();
        this.preflightDocument = new PreflightDocument(cosDocument, format, config);
    }

    /**
     * Create a validation context. This context is set to the PreflightDocument.
     */
    protected void createContext()
    {
        this.ctx = new PreflightContext(this.dataSource);
        ctx.setDocument(preflightDocument);
        preflightDocument.setContext(ctx);
        ctx.setXrefTrailerResolver(xrefTrailerResolver);
        ctx.setFileLen(this.fileLen);
    }

    @Override
    public PDDocument getPDDocument() throws IOException
    {
        preflightDocument.setResult(validationResult);
        // Add XMP MetaData
        return preflightDocument;
    }

    public PreflightDocument getPreflightDocument() throws IOException
    {
        return (PreflightDocument) getPDDocument();
    }

    // --------------------------------------------------------
    // - Below All methods that adds controls on the PDF syntax
    // --------------------------------------------------------

    @Override
    /**
     * Fill the CosDocument with some object that isn't set by the NonSequentialParser
     */
    protected void initialParse() throws IOException
    {
        super.initialParse();
        // For each ObjectKey, we check if the object has been loaded
        // useful for linearized PDFs
        Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
        for (Entry<COSObjectKey, Long> entry : xrefTable.entrySet())
        {
            COSObject co = document.getObjectFromPool(entry.getKey());
            if (co.getObject() == null)
            {
                // object isn't loaded - parse the object to load its content
                parseObjectDynamically(co, true);
            }
        }
    }

    /**
     * Check that the PDF header match rules of the PDF/A specification. First line (offset 0) must
     * be a comment with the PDF version (version 1.0 isn't conform to the PDF/A specification)
     * Second line is a comment with at least 4 bytes greater than 0x80
     */
    protected void checkPdfHeader()
    {
        try
        {
            source.seek(0);
            String firstLine = readLine();
            if (firstLine == null || !firstLine.matches("%PDF-1\\.[1-9]"))
            {
                addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
                        "First line must match %PDF-1.\\d"));
            }
            String secondLine = readLine();
            if (secondLine != null)
            {
                byte[] secondLineAsBytes = secondLine.getBytes(encoding.name());
                if (secondLineAsBytes.length >= 5)
                {
                    if (secondLineAsBytes[0] != '%')
                    {
                        addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
                                "Second line must begin with '%' followed by at least 4 bytes greater than 127"));
                    }
                    else
                    {
                        for (int i = 1; i < 5; ++i)
                        {
                            byte b = secondLineAsBytes[i];
                            if ((b & 0xFF) < 0x80)
                            {
                                addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
                                        "Second line must begin with '%' followed by at least 4 bytes greater than 127"));
                                break;
                            }
                        }
                    }
                }
                else
                {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
                            "Second line must begin with '%' followed by at least 4 bytes greater than 127"));
                }
            }
            source.seek(0);
        }
        catch (IOException e)
        {
            addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER,
                    "Unable to read the PDF file : " + e.getMessage(), e));
        }
    }

    /**
     * Same method than the {@linkplain PDFParser#parseXrefTable(long)} with additional controls : -
     * EOL mandatory after the 'xref' keyword - Cross reference subsection header uses single white
     * space as separator - and so on
     *
     * @param startByteOffset the offset to start at
     * @return false on parsing error
     * @throws IOException If an IO error occurs.
     */
    @Override
    protected boolean parseXrefTable(long startByteOffset) throws IOException
    {
        if (source.peek() != 'x')
        {
            return false;
        }
        String xref = readString();
        if (!xref.equals("xref"))
        {
            addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
                    "xref must be followed by a EOL character"));
            return false;
        }
        if (!nextIsEOL())
        {
            addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
                    "xref must be followed by EOL"));
        }

        // signal start of new XRef
        xrefTrailerResolver.nextXrefObj(startByteOffset,XRefType.TABLE);

        // Xref tables can have multiple sections. Each starts with a starting object id and a count.
        while (true)
        {
            // just after the xref<EOL> there are an integer
            // first obj id
            long currObjID;
            // the number of objects in the xref table
            int count; 

            long offset = source.getPosition();
            String line = readLine();
            Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)");
            Matcher matcher = pattern.matcher(line);
            if (matcher.matches())
            {
                currObjID = Long.parseLong(matcher.group(1));
                count = Integer.parseInt(matcher.group(2));
            }
            else
            {
                addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF,
                        "Cross reference subsection header is invalid: '" + line + "' at position "
                                + source.getPosition()));
                // reset source cursor to read xref information
                source.seek(offset);
                // first obj id
                currObjID = readObjectNumber();
                // the number of objects in the xref table
                count = readInt();
            }

            skipSpaces();
            for (int i = 0; i < count; i++)
            {
                if (source.isEOF() || isEndOfName((char) source.peek()))
                {
                    break;
                }
                if (source.peek() == 't')
                {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
                            "Expected xref line but 't' found"));
                    break;
                }
                // Ignore table contents
                String currentLine = readLine();
                String[] splitString = currentLine.split(" ");
                if (splitString.length < 3)
                {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
                            "invalid xref line: " + currentLine));
                    break;
                }
                // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n)
                if (splitString[splitString.length - 1].equals("n"))
                {
                    try
                    {
                        long currOffset = Long.parseLong(splitString[0]);
                        int currGenID = Integer.parseInt(splitString[1]);
                        COSObjectKey objKey = new COSObjectKey(currObjID, currGenID);
                        xrefTrailerResolver.setXRef(objKey, currOffset);
                    }
                    catch (NumberFormatException e)
                    {
                        addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
                                "offset or genid can't be read as number " + e.getMessage(), e));
                    }
                }
                else if (!splitString[2].equals("f"))
                {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF,
                            "Corrupt XRefTable Entry - ObjID:" + currObjID));
                }
                currObjID++;
                skipSpaces();
            }
            skipSpaces();
            if (!isDigit())
            {
                break;
            }
        }
        return true;
    }

    /**
     * Wraps the {@link PDFParser#parseCOSStream} to check rules on 'stream' and 'endstream'
     * keywords. {@link #checkStreamKeyWord()} and {@link #checkEndstreamKeyWord()}
     *
     * @param dic dictionary that goes with this stream.
     *
     * @return parsed pdf stream.
     *
     * @throws IOException if an error occurred reading the stream, like problems with reading
     * length attribute, stream does not end with 'endstream' after data read, stream too short etc.
     */
    @Override
    protected COSStream parseCOSStream(COSDictionary dic) throws IOException
    {
        checkStreamKeyWord();
        COSStream result = super.parseCOSStream(dic);
        checkEndstreamKeyWord();
        return result;
    }

    /**
     * 'stream' must be followed by <CR><LF> or only <LF>
     * 
     * @throws IOException
     */
    protected void checkStreamKeyWord() throws IOException
    {
        String streamV = readString();
        if (!streamV.equals("stream"))
        {
            addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
                    "Expected 'stream' keyword but found '" + streamV + "' at offset "+source.getPosition()));
        }
        int nextChar = source.read();
        if (!((nextChar == 13 && source.peek() == 10) || nextChar == 10))
        {
            addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
                    "Expected 'EOL' after the stream keyword at offset "+source.getPosition()));
        }
        // set the offset before stream
        source.seek(source.getPosition() - 7);
    }

    /**
     * 'endstream' must be preceded by an EOL
     * 
     * @throws IOException
     */
    protected void checkEndstreamKeyWord() throws IOException
    {
        source.seek(source.getPosition() - 10);
        if (!nextIsEOL())
        {
            addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
                    "Expected 'EOL' before the endstream keyword at offset "+source.getPosition()+" but found '"+source.peek()+"'"));
        }
        String endstreamV = readString();
        if (!endstreamV.equals("endstream"))
        {
            addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER,
                    "Expected 'endstream' keyword at offset "+source.getPosition()+" but found '" + endstreamV + "'"));
        }
    }

    private boolean nextIsEOL() throws IOException
    {
        boolean succeed = false;
        int nextChar = source.read();
        if (ASCII_CR == nextChar && ASCII_LF == source.peek())
        {
            source.read();
            succeed = true;
        }
        else if (ASCII_CR == nextChar || ASCII_LF == nextChar)
        {
            succeed = true;
        }
        return succeed;
    }

    @Override
    /**
     * Call {@link BaseParser#parseCOSArray()} and check the number of element in the array
     */
    protected COSArray parseCOSArray() throws IOException
    {
        COSArray result = super.parseCOSArray();
        if (result != null && result.size() > MAX_ARRAY_ELEMENTS)
        {
            addValidationError(new ValidationError(ERROR_SYNTAX_ARRAY_TOO_LONG, "Array too long : " + result.size()));
        }
        return result;
    }

    @Override
    /**
     * Call {@link BaseParser#parseCOSName()} and check the length of the name
     */
    protected COSName parseCOSName() throws IOException
    {
        COSName result = super.parseCOSName();
        if (result != null && result.getName().getBytes().length > MAX_NAME_SIZE)
        {
            addValidationError(new ValidationError(ERROR_SYNTAX_NAME_TOO_LONG, "Name too long: " + result.getName()));
        }
        return result;
    }

    /**
     * Check that the hexa string contains only an even number of
     * Hexadecimal characters. Once it is done, reset the offset at the beginning of the string and
     * call {@link PDFParser#parseCOSString()}
     *
     * @return The parsed PDF string.
     *
     * @throws IOException If there is an error reading from the stream.
     */
    @Override
    protected COSString parseCOSString() throws IOException
    {
        // offset reminder
        long offset = source.getPosition();
        char nextChar = (char) source.read();
        int count = 0;
        if (nextChar == '<')
        {
            do
            {
                nextChar = (char) source.read();
                if (nextChar != '>')
                {
                    if (isWhitespace(nextChar))
                    {
                        // ignore space characters
                        continue;
                    }
                    if (Character.digit(nextChar, 16) >= 0)
                    {
                        count++;
                    }
                    else
                    {
                        addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID,
                                "Hexa String must have only Hexadecimal Characters (found '" + nextChar + "') at offset " + source.getPosition()));
                        break;
                    }
                }
            } 
            while (nextChar != '>');
        }

        if (count % 2 != 0)
        {
            addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER,
                    "Hexa string shall contain even number of non white space char at offset " + source.getPosition()));
        }

        // reset the offset to parse the COSString
        source.seek(offset);
        COSString result = super.parseCOSString();

        if (result.getString().length() > MAX_STRING_LENGTH)
        {
            addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long at offset "+source.getPosition()));
        }
        return result;
    }

    /**
     * Call {@link PDFParser#parseDirObject()} check limit range for Float, Integer and number of
     * Dictionary entries.
     *
     * @return The parsed object.
     * @throws java.io.IOException if there is an error during parsing.
     */
    @Override
    protected COSBase parseDirObject() throws IOException
    {
        COSBase result = super.parseDirObject();

        if (result instanceof COSNumber)
        {
            COSNumber number = (COSNumber) result;
            if (number instanceof COSFloat)
            {
                Double real = number.doubleValue();
                if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT)
                {
                    addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE,
                            "Float is too long or too small: " + real+"  at offset "+source.getPosition()));
                }
            }
            else
            {
                long numAsLong = number.longValue();
                if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE)
                {
                    addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE,
                            "Numeric is too long or too small: " + numAsLong+"  at offset "+source.getPosition()));
                }
            }
        }

        if (result instanceof COSDictionary)
        {
            COSDictionary dic = (COSDictionary) result;
            if (dic.size() > MAX_DICT_ENTRIES)
            {
                addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary at offset "+source.getPosition()));
            }
        }
        return result;
    }

    @Override
    protected COSBase parseObjectDynamically(long objNr, int objGenNr, boolean requireExistingNotCompressedObj)
            throws IOException
    {
        // ---- create object key and get object (container) from pool
        final COSObjectKey objKey = new COSObjectKey(objNr, objGenNr);
        final COSObject pdfObject = document.getObjectFromPool(objKey);

        if (pdfObject.getObject() == null)
        {
            // not previously parsed
            // ---- read offset or object stream object number from xref table
            Long offsetOrObjstmObNr = xrefTrailerResolver.getXrefTable().get(objKey);

            // sanity test to circumvent loops with broken documents
            if (requireExistingNotCompressedObj && ((offsetOrObjstmObNr == null)))
            {
                addValidationError(new ValidationError(ERROR_SYNTAX_MISSING_OFFSET,
                        "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":"
                                + objKey.getGeneration()));
                throw new SyntaxValidationException("Object must be defined and must not be compressed object: "
                        + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult);
            }

            if (offsetOrObjstmObNr == null)
            {
                // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9)
                pdfObject.setObject(COSNull.NULL);
            }
            else if (offsetOrObjstmObNr == 0)
            {
                addValidationError(new ValidationError(ERROR_SYNTAX_INVALID_OFFSET, "Object {" + objKey.getNumber()
                        + ":" + objKey.getGeneration() + "} has an offset of 0"));
            }
            else if (offsetOrObjstmObNr > 0)
            {
                // offset of indirect object in file
                // ---- go to object start
                source.seek(offsetOrObjstmObNr);
                // ---- we must have an indirect object
                long readObjNr;
                int readObjGen;

                long offset = source.getPosition();
                String line = readLine();
                Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj");
                Matcher matcher = pattern.matcher(line);
                if (matcher.matches())
                {
                    readObjNr = Long.parseLong(matcher.group(1));
                    readObjGen = Integer.parseInt(matcher.group(2));
                }
                else
                {

                    addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected [offset="+offset+"; key="+offsetOrObjstmObNr.toString()+"; line="+line+"; object="+pdfObject.toString()+"]"));

                    // reset source cursor to read object information
                    source.seek(offset);
                    readObjNr = readObjectNumber();
                    readObjGen = readGenerationNumber();
                    skipSpaces(); // skip spaces between Object Generation number and the 'obj' keyword 
                    for (char c : OBJ_MARKER)
                    {
                        if (source.read() != c)
                        {
                            addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '"
                                    + new String(OBJ_MARKER) + " but missed at character '" + c + "'"));
                            throw new SyntaxValidationException("Expected pattern '" + new String(OBJ_MARKER)
                                    + " but missed at character '" + c + "'", validationResult);
                        }
                    }
                }

                // ---- consistency check
                if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration()))
                {
                    throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration()
                            + " points to wrong object: " + readObjNr + ":" + readObjGen);
                }

                skipSpaces();
                COSBase pb = parseDirObject();
                skipSpaces();
                long endObjectOffset = source.getPosition();
                String endObjectKey = readString();

                if (endObjectKey.equals("stream"))
                {
                    source.seek(endObjectOffset);
                    if (pb instanceof COSDictionary)
                    {
                        COSStream stream = parseCOSStream((COSDictionary) pb);
                        if (securityHandler != null)
                        {
                            securityHandler.decryptStream(stream, objNr, objGenNr);
                        }
                        pb = stream;
                    }
                    else
                    {
                        // this is not legal
                        // the combination of a dict and the stream/endstream forms a complete stream object
                        throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ").");
                    }
                    skipSpaces();
                    endObjectOffset = source.getPosition();
                    endObjectKey = readString();

                    // we have case with a second 'endstream' before endobj
                    if (!endObjectKey.startsWith("endobj"))
                    {
                        if (endObjectKey.startsWith("endstream"))
                        {
                            endObjectKey = endObjectKey.substring(9).trim();
                            if (endObjectKey.length() == 0)
                            {
                                // no other characters in extra endstream line
                                endObjectKey = readString(); // read next line
                            }
                        }
                    }
                }
                else if (securityHandler != null)
                {
                    securityHandler.decrypt(pb, objNr, objGenNr);
                }

                pdfObject.setObject(pb);

                if (!endObjectKey.startsWith("endobj"))
                {
                    throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset "
                            + offsetOrObjstmObNr + " does not end with 'endobj'.");
                }
                else
                {
                    offset = source.getPosition();
                    source.seek(endObjectOffset - 1);
                    if (!nextIsEOL())
                    {
                        addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
                                "EOL expected before the 'endobj' keyword at offset "+source.getPosition()));
                    }
                    source.seek(offset);
                }

                if (!nextIsEOL())
                {
                    addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER,
                            "EOL expected after the 'endobj' keyword at offset "+source.getPosition()));
                }
            }
            else
            {
                // xref value is object nr of object stream containing object to be parsed;
                // since our object was not found it means object stream was not parsed so far
                final int objstmObjNr = (int) (-offsetOrObjstmObNr);
                final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true);
                if (objstmBaseObj instanceof COSStream)
                {
                    // parse object stream
                    PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document);
                    parser.parse();

                    // register all objects which are referenced to be contained in object stream
                    for (COSObject next : parser.getObjects())
                    {
                        COSObjectKey stmObjKey = new COSObjectKey(next);
                        Long offset = xrefTrailerResolver.getXrefTable().get(stmObjKey); 
                        if (offset != null && offset == -objstmObjNr)
                        {
                            COSObject stmObj = document.getObjectFromPool(stmObjKey);
                            stmObj.setObject(next.getObject());
                        }
                    }
                }
            }
        }
        return pdfObject.getObject();
    }

    @Override
    protected int lastIndexOf(final char[] pattern, final byte[] buf, final int endOff)
    {
        int offset = super.lastIndexOf(pattern, buf, endOff);
        if (offset > 0 && Arrays.equals(pattern, EOF_MARKER))
        {
            // this is the offset of the last %%EOF sequence.
            // nothing should be present after this sequence.
            int tmpOffset = offset + pattern.length;
            if (tmpOffset != buf.length)
            {
                // EOL is authorized
                if ((buf.length - tmpOffset) > 2
                        || (buf.length - tmpOffset == 2 && (buf[tmpOffset] != 13 || buf[tmpOffset + 1] != 10))
                        || (buf.length - tmpOffset == 1 && (buf[tmpOffset] != 13 && buf[tmpOffset] != 10)))
                {
                    long position;
                    try
                    {
                        position = source.getPosition();
                    }
                    catch(IOException excpetion)
                    {
                        position = Long.MIN_VALUE;
                    }
                    addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF,
                            "File contains data after the last %%EOF sequence at offset " + position));
                }
            }
        }
        return offset;
    }
}