/***************************************************************************** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * ****************************************************************************/ package org.apache.pdfbox.preflight.parser; import java.io.File; import java.io.IOException; import java.nio.charset.Charset; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.activation.DataSource; import javax.activation.FileDataSource; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; import org.apache.pdfbox.cos.COSDictionary; import org.apache.pdfbox.cos.COSDocument; import org.apache.pdfbox.cos.COSFloat; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.cos.COSNull; import org.apache.pdfbox.cos.COSNumber; import org.apache.pdfbox.cos.COSObject; import org.apache.pdfbox.cos.COSObjectKey; import org.apache.pdfbox.cos.COSStream; import org.apache.pdfbox.cos.COSString; import org.apache.pdfbox.io.IOUtils; import org.apache.pdfbox.io.RandomAccessBufferedFileInputStream; import org.apache.pdfbox.io.ScratchFile; import org.apache.pdfbox.pdfparser.PDFObjectStreamParser; import org.apache.pdfbox.pdfparser.PDFParser; import org.apache.pdfbox.pdfparser.XrefTrailerResolver.XRefType; import org.apache.pdfbox.pdmodel.PDDocument; import org.apache.pdfbox.preflight.Format; import org.apache.pdfbox.preflight.PreflightConfiguration; import org.apache.pdfbox.preflight.PreflightConstants; import org.apache.pdfbox.preflight.PreflightContext; import org.apache.pdfbox.preflight.PreflightDocument; import org.apache.pdfbox.preflight.ValidationResult; import org.apache.pdfbox.preflight.ValidationResult.ValidationError; import org.apache.pdfbox.preflight.exception.SyntaxValidationException; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_ARRAY_TOO_LONG; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_CROSS_REF; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_INVALID; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_HEXA_STRING_TOO_LONG; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_INVALID_OFFSET; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_MISSING_OFFSET; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NAME_TOO_LONG; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_NUMERIC_RANGE; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_STREAM_DELIMITER; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TOO_MANY_ENTRIES; import static org.apache.pdfbox.preflight.PreflightConstants.ERROR_SYNTAX_TRAILER_EOF; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_ARRAY_ELEMENTS; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_DICT_ENTRIES; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NAME_SIZE; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_NEGATIVE_FLOAT; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_POSITIVE_FLOAT; import static org.apache.pdfbox.preflight.PreflightConstants.MAX_STRING_LENGTH; public class PreflightParser extends PDFParser { /** * Define a one byte encoding that hasn't specific encoding in UTF-8 charset. Avoid unexpected error when the * encoding is Cp5816 */ public static final Charset encoding = Charset.forName("ISO-8859-1"); protected DataSource dataSource; protected ValidationResult validationResult; protected PreflightDocument preflightDocument; protected PreflightContext ctx; /** * Constructor. * * @param file * @throws IOException if there is a reading error. */ public PreflightParser(File file) throws IOException { // TODO move file handling outside of the parser super(new RandomAccessBufferedFileInputStream(file)); this.setLenient(false); this.dataSource = new FileDataSource(file); } /** * Constructor. * * @param file * @param scratch * @throws IOException if there is a reading error. */ public PreflightParser(File file, ScratchFile scratch) throws IOException { // TODO move file handling outside of the parser super(new RandomAccessBufferedFileInputStream(file), scratch); this.setLenient(false); this.dataSource = new FileDataSource(file); } /** * Constructor. * * @param filename * @throws IOException if there is a reading error. */ public PreflightParser(String filename) throws IOException { // TODO move file handling outside of the parser this(new File(filename)); } /** * Constructor. * * @param filename * @param scratch * @throws IOException if there is a reading error. */ public PreflightParser(String filename, ScratchFile scratch) throws IOException { // TODO move file handling outside of the parser this(new File(filename), scratch); } /** * Constructor. This one is slower than the file and the filename constructors, because * a temporary file will be created. * * @param dataSource the datasource * @throws IOException if there is a reading error. */ public PreflightParser(DataSource dataSource) throws IOException { // TODO move file handling outside of the parser super(new RandomAccessBufferedFileInputStream(dataSource.getInputStream())); this.setLenient(false); this.dataSource = dataSource; } /** * Constructor. This one is slower than the file and the filename constructors, because * a temporary file will be created. * * @param dataSource the datasource * @param scratch * @throws IOException if there is a reading error. */ public PreflightParser(DataSource dataSource, ScratchFile scratch) throws IOException { // TODO move file handling outside of the parser super(new RandomAccessBufferedFileInputStream(dataSource.getInputStream()), scratch); this.setLenient(false); this.dataSource = dataSource; } /** * Create an instance of ValidationResult with a ValidationError(UNKNOWN_ERROR) * * @return the ValidationError instance. */ protected static ValidationResult createUnknownErrorResult() { ValidationError error = new ValidationError(PreflightConstants.ERROR_UNKOWN_ERROR); return new ValidationResult(error); } /** * Add the error to the ValidationResult. If the validationResult is null, an instance is created using the * isWarning boolean of the ValidationError to know if the ValidationResult must be flagged as Valid. * * @param error */ protected void addValidationError(ValidationError error) { if (this.validationResult == null) { this.validationResult = new ValidationResult(error.isWarning()); } this.validationResult.addError(error); } protected void addValidationErrors(List<ValidationError> errors) { for (ValidationError error : errors) { addValidationError(error); } } @Override public void parse() throws IOException { parse(Format.PDF_A1B); } /** * Parse the given file and check if it is a confirming file according to the given format. * * @param format * format that the document should follow (default {@link Format#PDF_A1B}) * @throws IOException */ public void parse(Format format) throws IOException { parse(format, null); } /** * Parse the given file and check if it is a confirming file according to the given format. * * @param format * format that the document should follow (default {@link Format#PDF_A1B}) * @param config * Configuration bean that will be used by the PreflightDocument. If null the format is used to determine * the default configuration. * @throws IOException */ public void parse(Format format, PreflightConfiguration config) throws IOException { checkPdfHeader(); try { super.parse(); } catch (IOException e) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_COMMON, e.getMessage())); throw new SyntaxValidationException(e, this.validationResult); } finally { // TODO move file handling outside of the parser IOUtils.closeQuietly(source); } Format formatToUse = (format == null ? Format.PDF_A1B : format); createPdfADocument(formatToUse, config); createContext(); } protected void createPdfADocument(Format format, PreflightConfiguration config) throws IOException { COSDocument cosDocument = getDocument(); this.preflightDocument = new PreflightDocument(cosDocument, format, config); } /** * Create a validation context. This context is set to the PreflightDocument. */ protected void createContext() { this.ctx = new PreflightContext(this.dataSource); ctx.setDocument(preflightDocument); preflightDocument.setContext(ctx); ctx.setXrefTrailerResolver(xrefTrailerResolver); ctx.setFileLen(this.fileLen); } @Override public PDDocument getPDDocument() throws IOException { preflightDocument.setResult(validationResult); // Add XMP MetaData return preflightDocument; } public PreflightDocument getPreflightDocument() throws IOException { return (PreflightDocument) getPDDocument(); } // -------------------------------------------------------- // - Below All methods that adds controls on the PDF syntax // -------------------------------------------------------- @Override /** * Fill the CosDocument with some object that isn't set by the NonSequentialParser */ protected void initialParse() throws IOException { super.initialParse(); // For each ObjectKey, we check if the object has been loaded // useful for linearized PDFs Map<COSObjectKey, Long> xrefTable = document.getXrefTable(); for (Entry<COSObjectKey, Long> entry : xrefTable.entrySet()) { COSObject co = document.getObjectFromPool(entry.getKey()); if (co.getObject() == null) { // object isn't loaded - parse the object to load its content parseObjectDynamically(co, true); } } } /** * Check that the PDF header match rules of the PDF/A specification. First line (offset 0) must * be a comment with the PDF version (version 1.0 isn't conform to the PDF/A specification) * Second line is a comment with at least 4 bytes greater than 0x80 */ protected void checkPdfHeader() { try { source.seek(0); String firstLine = readLine(); if (firstLine == null || !firstLine.matches("%PDF-1\\.[1-9]")) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "First line must match %PDF-1.\\d")); } String secondLine = readLine(); if (secondLine != null) { byte[] secondLineAsBytes = secondLine.getBytes(encoding.name()); if (secondLineAsBytes.length >= 5) { if (secondLineAsBytes[0] != '%') { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must begin with '%' followed by at least 4 bytes greater than 127")); } else { for (int i = 1; i < 5; ++i) { byte b = secondLineAsBytes[i]; if ((b & 0xFF) < 0x80) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must begin with '%' followed by at least 4 bytes greater than 127")); break; } } } } else { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Second line must begin with '%' followed by at least 4 bytes greater than 127")); } } source.seek(0); } catch (IOException e) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_HEADER, "Unable to read the PDF file : " + e.getMessage(), e)); } } /** * Same method than the {@linkplain PDFParser#parseXrefTable(long)} with additional controls : - * EOL mandatory after the 'xref' keyword - Cross reference subsection header uses single white * space as separator - and so on * * @param startByteOffset the offset to start at * @return false on parsing error * @throws IOException If an IO error occurs. */ @Override protected boolean parseXrefTable(long startByteOffset) throws IOException { if (source.peek() != 'x') { return false; } String xref = readString(); if (!xref.equals("xref")) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by a EOL character")); return false; } if (!nextIsEOL()) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "xref must be followed by EOL")); } // signal start of new XRef xrefTrailerResolver.nextXrefObj(startByteOffset,XRefType.TABLE); // Xref tables can have multiple sections. Each starts with a starting object id and a count. while (true) { // just after the xref<EOL> there are an integer // first obj id long currObjID; // the number of objects in the xref table int count; long offset = source.getPosition(); String line = readLine(); Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)(\\s*)"); Matcher matcher = pattern.matcher(line); if (matcher.matches()) { currObjID = Long.parseLong(matcher.group(1)); count = Integer.parseInt(matcher.group(2)); } else { addValidationError(new ValidationError(ERROR_SYNTAX_CROSS_REF, "Cross reference subsection header is invalid: '" + line + "' at position " + source.getPosition())); // reset source cursor to read xref information source.seek(offset); // first obj id currObjID = readObjectNumber(); // the number of objects in the xref table count = readInt(); } skipSpaces(); for (int i = 0; i < count; i++) { if (source.isEOF() || isEndOfName((char) source.peek())) { break; } if (source.peek() == 't') { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Expected xref line but 't' found")); break; } // Ignore table contents String currentLine = readLine(); String[] splitString = currentLine.split(" "); if (splitString.length < 3) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "invalid xref line: " + currentLine)); break; } // This supports the corrupt table as reported in PDFBOX-474 (XXXX XXX XX n) if (splitString[splitString.length - 1].equals("n")) { try { long currOffset = Long.parseLong(splitString[0]); int currGenID = Integer.parseInt(splitString[1]); COSObjectKey objKey = new COSObjectKey(currObjID, currGenID); xrefTrailerResolver.setXRef(objKey, currOffset); } catch (NumberFormatException e) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "offset or genid can't be read as number " + e.getMessage(), e)); } } else if (!splitString[2].equals("f")) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_CROSS_REF, "Corrupt XRefTable Entry - ObjID:" + currObjID)); } currObjID++; skipSpaces(); } skipSpaces(); if (!isDigit()) { break; } } return true; } /** * Wraps the {@link PDFParser#parseCOSStream} to check rules on 'stream' and 'endstream' * keywords. {@link #checkStreamKeyWord()} and {@link #checkEndstreamKeyWord()} * * @param dic dictionary that goes with this stream. * * @return parsed pdf stream. * * @throws IOException if an error occurred reading the stream, like problems with reading * length attribute, stream does not end with 'endstream' after data read, stream too short etc. */ @Override protected COSStream parseCOSStream(COSDictionary dic) throws IOException { checkStreamKeyWord(); COSStream result = super.parseCOSStream(dic); checkEndstreamKeyWord(); return result; } /** * 'stream' must be followed by <CR><LF> or only <LF> * * @throws IOException */ protected void checkStreamKeyWord() throws IOException { String streamV = readString(); if (!streamV.equals("stream")) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'stream' keyword but found '" + streamV + "' at offset "+source.getPosition())); } int nextChar = source.read(); if (!((nextChar == 13 && source.peek() == 10) || nextChar == 10)) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' after the stream keyword at offset "+source.getPosition())); } // set the offset before stream source.seek(source.getPosition() - 7); } /** * 'endstream' must be preceded by an EOL * * @throws IOException */ protected void checkEndstreamKeyWord() throws IOException { source.seek(source.getPosition() - 10); if (!nextIsEOL()) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'EOL' before the endstream keyword at offset "+source.getPosition()+" but found '"+source.peek()+"'")); } String endstreamV = readString(); if (!endstreamV.equals("endstream")) { addValidationError(new ValidationError(ERROR_SYNTAX_STREAM_DELIMITER, "Expected 'endstream' keyword at offset "+source.getPosition()+" but found '" + endstreamV + "'")); } } private boolean nextIsEOL() throws IOException { boolean succeed = false; int nextChar = source.read(); if (ASCII_CR == nextChar && ASCII_LF == source.peek()) { source.read(); succeed = true; } else if (ASCII_CR == nextChar || ASCII_LF == nextChar) { succeed = true; } return succeed; } @Override /** * Call {@link BaseParser#parseCOSArray()} and check the number of element in the array */ protected COSArray parseCOSArray() throws IOException { COSArray result = super.parseCOSArray(); if (result != null && result.size() > MAX_ARRAY_ELEMENTS) { addValidationError(new ValidationError(ERROR_SYNTAX_ARRAY_TOO_LONG, "Array too long : " + result.size())); } return result; } @Override /** * Call {@link BaseParser#parseCOSName()} and check the length of the name */ protected COSName parseCOSName() throws IOException { COSName result = super.parseCOSName(); if (result != null && result.getName().getBytes().length > MAX_NAME_SIZE) { addValidationError(new ValidationError(ERROR_SYNTAX_NAME_TOO_LONG, "Name too long: " + result.getName())); } return result; } /** * Check that the hexa string contains only an even number of * Hexadecimal characters. Once it is done, reset the offset at the beginning of the string and * call {@link PDFParser#parseCOSString()} * * @return The parsed PDF string. * * @throws IOException If there is an error reading from the stream. */ @Override protected COSString parseCOSString() throws IOException { // offset reminder long offset = source.getPosition(); char nextChar = (char) source.read(); int count = 0; if (nextChar == '<') { do { nextChar = (char) source.read(); if (nextChar != '>') { if (isWhitespace(nextChar)) { // ignore space characters continue; } if (Character.digit(nextChar, 16) >= 0) { count++; } else { addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_INVALID, "Hexa String must have only Hexadecimal Characters (found '" + nextChar + "') at offset " + source.getPosition())); break; } } } while (nextChar != '>'); } if (count % 2 != 0) { addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_EVEN_NUMBER, "Hexa string shall contain even number of non white space char at offset " + source.getPosition())); } // reset the offset to parse the COSString source.seek(offset); COSString result = super.parseCOSString(); if (result.getString().length() > MAX_STRING_LENGTH) { addValidationError(new ValidationError(ERROR_SYNTAX_HEXA_STRING_TOO_LONG, "Hexa string is too long at offset "+source.getPosition())); } return result; } /** * Call {@link PDFParser#parseDirObject()} check limit range for Float, Integer and number of * Dictionary entries. * * @return The parsed object. * @throws java.io.IOException if there is an error during parsing. */ @Override protected COSBase parseDirObject() throws IOException { COSBase result = super.parseDirObject(); if (result instanceof COSNumber) { COSNumber number = (COSNumber) result; if (number instanceof COSFloat) { Double real = number.doubleValue(); if (real > MAX_POSITIVE_FLOAT || real < MAX_NEGATIVE_FLOAT) { addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Float is too long or too small: " + real+" at offset "+source.getPosition())); } } else { long numAsLong = number.longValue(); if (numAsLong > Integer.MAX_VALUE || numAsLong < Integer.MIN_VALUE) { addValidationError(new ValidationError(ERROR_SYNTAX_NUMERIC_RANGE, "Numeric is too long or too small: " + numAsLong+" at offset "+source.getPosition())); } } } if (result instanceof COSDictionary) { COSDictionary dic = (COSDictionary) result; if (dic.size() > MAX_DICT_ENTRIES) { addValidationError(new ValidationError(ERROR_SYNTAX_TOO_MANY_ENTRIES, "Too Many Entries In Dictionary at offset "+source.getPosition())); } } return result; } @Override protected COSBase parseObjectDynamically(long objNr, int objGenNr, boolean requireExistingNotCompressedObj) throws IOException { // ---- create object key and get object (container) from pool final COSObjectKey objKey = new COSObjectKey(objNr, objGenNr); final COSObject pdfObject = document.getObjectFromPool(objKey); if (pdfObject.getObject() == null) { // not previously parsed // ---- read offset or object stream object number from xref table Long offsetOrObjstmObNr = xrefTrailerResolver.getXrefTable().get(objKey); // sanity test to circumvent loops with broken documents if (requireExistingNotCompressedObj && ((offsetOrObjstmObNr == null))) { addValidationError(new ValidationError(ERROR_SYNTAX_MISSING_OFFSET, "Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration())); throw new SyntaxValidationException("Object must be defined and must not be compressed object: " + objKey.getNumber() + ":" + objKey.getGeneration(), validationResult); } if (offsetOrObjstmObNr == null) { // not defined object -> NULL object (Spec. 1.7, chap. 3.2.9) pdfObject.setObject(COSNull.NULL); } else if (offsetOrObjstmObNr == 0) { addValidationError(new ValidationError(ERROR_SYNTAX_INVALID_OFFSET, "Object {" + objKey.getNumber() + ":" + objKey.getGeneration() + "} has an offset of 0")); } else if (offsetOrObjstmObNr > 0) { // offset of indirect object in file // ---- go to object start source.seek(offsetOrObjstmObNr); // ---- we must have an indirect object long readObjNr; int readObjGen; long offset = source.getPosition(); String line = readLine(); Pattern pattern = Pattern.compile("(\\d+)\\s(\\d+)\\sobj"); Matcher matcher = pattern.matcher(line); if (matcher.matches()) { readObjNr = Long.parseLong(matcher.group(1)); readObjGen = Integer.parseInt(matcher.group(2)); } else { addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Single space expected [offset="+offset+"; key="+offsetOrObjstmObNr.toString()+"; line="+line+"; object="+pdfObject.toString()+"]")); // reset source cursor to read object information source.seek(offset); readObjNr = readObjectNumber(); readObjGen = readGenerationNumber(); skipSpaces(); // skip spaces between Object Generation number and the 'obj' keyword for (char c : OBJ_MARKER) { if (source.read() != c) { addValidationError(new ValidationError(ERROR_SYNTAX_OBJ_DELIMITER, "Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'")); throw new SyntaxValidationException("Expected pattern '" + new String(OBJ_MARKER) + " but missed at character '" + c + "'", validationResult); } } } // ---- consistency check if ((readObjNr != objKey.getNumber()) || (readObjGen != objKey.getGeneration())) { throw new IOException("XREF for " + objKey.getNumber() + ":" + objKey.getGeneration() + " points to wrong object: " + readObjNr + ":" + readObjGen); } skipSpaces(); COSBase pb = parseDirObject(); skipSpaces(); long endObjectOffset = source.getPosition(); String endObjectKey = readString(); if (endObjectKey.equals("stream")) { source.seek(endObjectOffset); if (pb instanceof COSDictionary) { COSStream stream = parseCOSStream((COSDictionary) pb); if (securityHandler != null) { securityHandler.decryptStream(stream, objNr, objGenNr); } pb = stream; } else { // this is not legal // the combination of a dict and the stream/endstream forms a complete stream object throw new IOException("Stream not preceded by dictionary (offset: " + offsetOrObjstmObNr + ")."); } skipSpaces(); endObjectOffset = source.getPosition(); endObjectKey = readString(); // we have case with a second 'endstream' before endobj if (!endObjectKey.startsWith("endobj")) { if (endObjectKey.startsWith("endstream")) { endObjectKey = endObjectKey.substring(9).trim(); if (endObjectKey.length() == 0) { // no other characters in extra endstream line endObjectKey = readString(); // read next line } } } } else if (securityHandler != null) { securityHandler.decrypt(pb, objNr, objGenNr); } pdfObject.setObject(pb); if (!endObjectKey.startsWith("endobj")) { throw new IOException("Object (" + readObjNr + ":" + readObjGen + ") at offset " + offsetOrObjstmObNr + " does not end with 'endobj'."); } else { offset = source.getPosition(); source.seek(endObjectOffset - 1); if (!nextIsEOL()) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected before the 'endobj' keyword at offset "+source.getPosition())); } source.seek(offset); } if (!nextIsEOL()) { addValidationError(new ValidationError(PreflightConstants.ERROR_SYNTAX_OBJ_DELIMITER, "EOL expected after the 'endobj' keyword at offset "+source.getPosition())); } } else { // xref value is object nr of object stream containing object to be parsed; // since our object was not found it means object stream was not parsed so far final int objstmObjNr = (int) (-offsetOrObjstmObNr); final COSBase objstmBaseObj = parseObjectDynamically(objstmObjNr, 0, true); if (objstmBaseObj instanceof COSStream) { // parse object stream PDFObjectStreamParser parser = new PDFObjectStreamParser((COSStream) objstmBaseObj, document); parser.parse(); // register all objects which are referenced to be contained in object stream for (COSObject next : parser.getObjects()) { COSObjectKey stmObjKey = new COSObjectKey(next); Long offset = xrefTrailerResolver.getXrefTable().get(stmObjKey); if (offset != null && offset == -objstmObjNr) { COSObject stmObj = document.getObjectFromPool(stmObjKey); stmObj.setObject(next.getObject()); } } } } } return pdfObject.getObject(); } @Override protected int lastIndexOf(final char[] pattern, final byte[] buf, final int endOff) { int offset = super.lastIndexOf(pattern, buf, endOff); if (offset > 0 && Arrays.equals(pattern, EOF_MARKER)) { // this is the offset of the last %%EOF sequence. // nothing should be present after this sequence. int tmpOffset = offset + pattern.length; if (tmpOffset != buf.length) { // EOL is authorized if ((buf.length - tmpOffset) > 2 || (buf.length - tmpOffset == 2 && (buf[tmpOffset] != 13 || buf[tmpOffset + 1] != 10)) || (buf.length - tmpOffset == 1 && (buf[tmpOffset] != 13 && buf[tmpOffset] != 10))) { long position; try { position = source.getPosition(); } catch(IOException excpetion) { position = Long.MIN_VALUE; } addValidationError(new ValidationError(ERROR_SYNTAX_TRAILER_EOF, "File contains data after the last %%EOF sequence at offset " + position)); } } } return offset; } }