package com.tom_roush.pdfbox.pdfparser; import android.util.Log; import com.tom_roush.pdfbox.cos.COSArray; import com.tom_roush.pdfbox.cos.COSBase; import com.tom_roush.pdfbox.cos.COSBoolean; import com.tom_roush.pdfbox.cos.COSDictionary; import com.tom_roush.pdfbox.cos.COSDocument; import com.tom_roush.pdfbox.cos.COSInteger; import com.tom_roush.pdfbox.cos.COSName; import com.tom_roush.pdfbox.cos.COSNull; import com.tom_roush.pdfbox.cos.COSNumber; import com.tom_roush.pdfbox.cos.COSObject; import com.tom_roush.pdfbox.cos.COSObjectKey; import com.tom_roush.pdfbox.cos.COSString; import com.tom_roush.pdfbox.util.Charsets; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.Arrays; import static com.tom_roush.pdfbox.util.Charsets.ISO_8859_1; /** * This class is used to contain parsing logic that will be used by both the * PDFParser and the COSStreamParser. * * @author Ben Litchfield */ public abstract class BaseParser { private static final long OBJECT_NUMBER_THRESHOLD = 10000000000L; private static final long GENERATION_NUMBER_THRESHOLD = 65535; protected static final int E = 'e'; protected static final int N = 'n'; protected static final int D = 'd'; protected static final int S = 's'; protected static final int T = 't'; protected static final int R = 'r'; protected static final int A = 'a'; protected static final int M = 'm'; protected static final int O = 'o'; protected static final int B = 'b'; protected static final int J = 'j'; /** * This is a string constant that will be used for comparisons. */ public static final String DEF = "def"; /** * This is a string constant that will be used for comparisons. */ protected static final String ENDOBJ_STRING = "endobj"; /** * This is a string constant that will be used for comparisons. */ protected static final String ENDSTREAM_STRING = "endstream"; /** * This is a string constant that will be used for comparisons. */ protected static final String STREAM_STRING = "stream"; /** * This is a string constant that will be used for comparisons. */ private static final String TRUE = "true"; /** * This is a string constant that will be used for comparisons. */ private static final String FALSE = "false"; /** * This is a string constant that will be used for comparisons. */ private static final String NULL = "null"; /** * ASCII code for line feed. */ protected static final byte ASCII_LF = 10; /** * ASCII code for carriage return. */ protected static final byte ASCII_CR = 13; private static final byte ASCII_ZERO = 48; private static final byte ASCII_NINE = 57; private static final byte ASCII_SPACE = 32; /** * This is the stream that will be read from. */ protected final SequentialSource seqSource; /** * This is the document that will be parsed. */ protected COSDocument document; /** * Default constructor. */ public BaseParser(SequentialSource pdfSource) { this.seqSource = pdfSource; } private static boolean isHexDigit(char ch) { return isDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'); } /** * This will parse a PDF dictionary value. * * @return The parsed Dictionary object. * * @throws IOException If there is an error parsing the dictionary object. */ private COSBase parseCOSDictionaryValue() throws IOException { long numOffset = seqSource.getPosition(); COSBase number = parseDirObject(); skipSpaces(); if (!isDigit()) { return number; } long genOffset = seqSource.getPosition(); COSBase generationNumber = parseDirObject(); skipSpaces(); readExpectedChar('R'); if (!(number instanceof COSInteger)) { throw new IOException("expected number, actual=" + number + " at offset " + numOffset); } if (!(generationNumber instanceof COSInteger)) { throw new IOException("expected number, actual=" + number + " at offset " + genOffset); } COSObjectKey key = new COSObjectKey(((COSInteger) number).longValue(), ((COSInteger) generationNumber).intValue()); return getObjectFromPool(key); } private COSBase getObjectFromPool(COSObjectKey key) throws IOException { if (document == null) { throw new IOException( "object reference " + key + " at offset " + seqSource.getPosition() + " in content stream"); } return document.getObjectFromPool(key); } /** * This will parse a PDF dictionary. * * @return The parsed dictionary. * * @throws IOException If there is an error reading the stream. */ protected COSDictionary parseCOSDictionary() throws IOException { readExpectedChar('<'); readExpectedChar('<'); skipSpaces(); COSDictionary obj = new COSDictionary(); boolean done = false; while (!done) { skipSpaces(); char c = (char) seqSource.peek(); if (c == '>') { done = true; } else if (c == '/') { parseCOSDictionaryNameValuePair(obj); } else { // invalid dictionary, we were expecting a /Name, read until the end or until we can recover Log.w("PdfBox-Android", "Invalid dictionary, found: '" + c + "' but expected: '/'"); if (readUntilEndOfCOSDictionary()) { // we couldn't recover return obj; } } } readExpectedChar('>'); readExpectedChar('>'); return obj; } /** * Keep reading until the end of the dictionary object or the file has been hit, or until a '/' * has been found. * * @return true if the end of the object or the file has been found, false if not, i.e. that the * caller can continue to parse the dictionary at the current position. * @throws IOException if there is a reading error. */ private boolean readUntilEndOfCOSDictionary() throws IOException { int c = seqSource.read(); while (c != -1 && c != '/' && c != '>') { // in addition to stopping when we find / or >, we also want // to stop when we find endstream or endobj. if (c == E) { c = seqSource.read(); if (c == N) { c = seqSource.read(); if (c == D) { c = seqSource.read(); boolean isStream = c == S && seqSource.read() == T && seqSource.read() == R && seqSource.read() == E && seqSource.read() == A && seqSource.read() == M; boolean isObj = !isStream && c == O && seqSource.read() == B && seqSource.read() == J; if (isStream || isObj) { // we're done reading this object! return true; } } } } c = seqSource.read(); } if (c == -1) { return true; } seqSource.unread(c); return false; } private void parseCOSDictionaryNameValuePair(COSDictionary obj) throws IOException { COSName key = parseCOSName(); COSBase value = parseCOSDictionaryValue(); skipSpaces(); if (((char) seqSource.peek()) == 'd') { // if the next string is 'def' then we are parsing a cmap stream // and want to ignore it, otherwise throw an exception. String potentialDEF = readString(); if (!potentialDEF.equals(DEF)) { seqSource.unread(potentialDEF.getBytes(ISO_8859_1)); } else { skipSpaces(); } } if (value == null) { Log.w("PdfBox-Android", "Bad Dictionary Declaration " + seqSource); } else { value.setDirect(true); obj.setItem(key, value); } } protected void skipWhiteSpace() throws IOException { //PDF Ref 3.2.7 A stream must be followed by either //a CRLF or LF but nothing else. int whitespace = seqSource.read(); //see brother_scan_cover.pdf, it adds whitespaces //after the stream but before the start of the //data, so just read those first while (ASCII_SPACE == whitespace) { whitespace = seqSource.read(); } if (ASCII_CR == whitespace) { whitespace = seqSource.read(); if (ASCII_LF != whitespace) { seqSource.unread(whitespace); //The spec says this is invalid but it happens in the real //world so we must support it. } } else if (ASCII_LF == whitespace) { //that is fine } else { //we are in an error. //but again we will do a lenient parsing and just assume that everything //is fine seqSource.unread(whitespace); } } /** * This is really a bug in the Document creators code, but it caused a crash * in PDFBox, the first bug was in this format: * /Title ( (5) * /Creator which was patched in 1 place. * However it missed the case where the Close Paren was escaped * * The second bug was in this format * /Title (c:\) * /Producer * * This patch moves this code out of the parseCOSString method, so it can be used twice. * * * @param bracesParameter the number of braces currently open. * * @return the corrected value of the brace counter * @throws IOException */ private int checkForMissingCloseParen(final int bracesParameter) throws IOException { int braces = bracesParameter; byte[] nextThreeBytes = new byte[3]; int amountRead = seqSource.read(nextThreeBytes); //lets handle the special case seen in Bull River Rules and Regulations.pdf //The dictionary looks like this // 2 0 obj // << // /Type /Info // /Creator (PaperPort http://www.scansoft.com) // /Producer (sspdflib 1.0 http://www.scansoft.com) // /Title ( (5) // /Author () // /Subject () // // Notice the /Title, the braces are not even but they should // be. So lets assume that if we encounter an this scenario // <end_brace><new_line><opening_slash> then that // means that there is an error in the pdf and assume that // was the end of the document. // if (amountRead == 3 && ( nextThreeBytes[0] == ASCII_CR // Look for a carriage return && nextThreeBytes[1] == ASCII_LF // Look for a new line && nextThreeBytes[2] == 0x2f ) // Look for a slash / // Add a second case without a new line || (nextThreeBytes[0] == ASCII_CR // Look for a carriage return && nextThreeBytes[1] == 0x2f )) // Look for a slash / { braces = 0; } if (amountRead > 0) { seqSource.unread(Arrays.copyOfRange(nextThreeBytes, 0, amountRead)); } return braces; } /** * This will parse a PDF string. * * @return The parsed PDF string. * * @throws IOException If there is an error reading from the stream. */ protected COSString parseCOSString() throws IOException { char nextChar = (char) seqSource.read(); char openBrace; char closeBrace; if (nextChar == '(') { openBrace = '('; closeBrace = ')'; } else if (nextChar == '<') { return parseCOSHexString(); } else { throw new IOException("parseCOSString string should start with '(' or '<' and not '" + nextChar + "' " + seqSource); } ByteArrayOutputStream out = new ByteArrayOutputStream(); //This is the number of braces read // int braces = 1; int c = seqSource.read(); while (braces > 0 && c != -1) { char ch = (char) c; int nextc = -2; // not yet read if(ch == closeBrace) { braces--; braces = checkForMissingCloseParen(braces); if( braces != 0 ) { out.write(ch); } } else if( ch == openBrace ) { braces++; out.write(ch); } else if( ch == '\\' ) { //patched by ram char next = (char) seqSource.read(); switch (next) { case 'n': out.write('\n'); break; case 'r': out.write('\r'); break; case 't': out.write('\t'); break; case 'b': out.write('\b'); break; case 'f': out.write('\f'); break; case ')': // PDFBox 276 /Title (c:\) braces = checkForMissingCloseParen(braces); if (braces != 0) { out.write(next); } else { out.write('\\'); } break; case '(': case '\\': out.write(next); break; case ASCII_LF: case ASCII_CR: //this is a break in the line so ignore it and the newline and continue c = seqSource.read(); while (isEOL(c) && c != -1) { c = seqSource.read(); } nextc = c; break; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': { StringBuffer octal = new StringBuffer(); octal.append(next); c = seqSource.read(); char digit = (char) c; if (digit >= '0' && digit <= '7') { octal.append(digit); c = seqSource.read(); digit = (char) c; if (digit >= '0' && digit <= '7') { octal.append(digit); } else { nextc = c; } } else { nextc = c; } int character = 0; try { character = Integer.parseInt( octal.toString(), 8 ); } catch( NumberFormatException e ) { throw new IOException( "Error: Expected octal character, actual='" + octal + "'", e ); } out.write(character); break; } default: { // dropping the backslash // see 7.3.4.2 Literal Strings for further information out.write(next); } } } else { out.write(ch); } if (nextc != -2) { c = nextc; } else { c = seqSource.read(); } } if (c != -1) { seqSource.unread(c); } return new COSString(out.toByteArray()); } /** * This will parse a PDF HEX string with fail fast semantic * meaning that we stop if a not allowed character is found. * This is necessary in order to detect malformed input and * be able to skip to next object start. * * We assume starting '<' was already read. * * @return The parsed PDF string. * * @throws IOException If there is an error reading from the stream. */ private COSString parseCOSHexString() throws IOException { final StringBuilder sBuf = new StringBuilder(); while( true ) { int c = seqSource.read(); if (isHexDigit((char) c)) { sBuf.append((char) c); } else if (c == '>') { break; } else if (c < 0) { throw new IOException("Missing closing bracket for hex string. Reached EOS."); } else if ((c == ' ') || (c == '\n') || (c == '\t') || (c == '\r') || (c == '\b') || (c == '\f')) { continue; } else { // if invalid chars was found: discard last // hex character if it is not part of a pair if (sBuf.length() % 2 != 0) { sBuf.deleteCharAt(sBuf.length() - 1); } // read till the closing bracket was found do { c = seqSource.read(); } while (c != '>' && c >= 0); // might have reached EOF while looking for the closing bracket // this can happen for malformed PDFs only. Make sure that there is // no endless loop. if ( c < 0 ) { throw new IOException( "Missing closing bracket for hex string. Reached EOS." ); } // exit loop break; } } return COSString.parseHex(sBuf.toString()); } /** * This will parse a PDF array object. * * @return The parsed PDF array. * * @throws IOException If there is an error parsing the stream. */ protected COSArray parseCOSArray() throws IOException { readExpectedChar('['); COSArray po = new COSArray(); COSBase pbo; skipSpaces(); int i; while (((i = seqSource.peek()) > 0) && ((char) i != ']')) { pbo = parseDirObject(); if (pbo instanceof COSObject) { // We have to check if the expected values are there or not PDFBOX-385 if (po.get(po.size() - 1) instanceof COSInteger) { COSInteger genNumber = (COSInteger) po.remove(po.size() - 1); if (po.get(po.size() - 1) instanceof COSInteger) { COSInteger number = (COSInteger) po.remove(po.size() - 1); COSObjectKey key = new COSObjectKey(number.longValue(), genNumber.intValue()); pbo = getObjectFromPool(key); } else { // the object reference is somehow wrong pbo = null; } } else { pbo = null; } } if (pbo != null) { po.add(pbo); } else { //it could be a bad object in the array which is just skipped Log.w("PdfBox-Android", "Corrupt object reference at offset " + seqSource.getPosition()); // This could also be an "endobj" or "endstream" which means we can assume that // the array has ended. String isThisTheEnd = readString(); seqSource.unread(isThisTheEnd.getBytes(ISO_8859_1)); if (ENDOBJ_STRING.equals(isThisTheEnd) || ENDSTREAM_STRING.equals(isThisTheEnd)) { return po; } } skipSpaces(); } //read ']' seqSource.read(); skipSpaces(); return po; } /** * Determine if a character terminates a PDF name. * * @param ch The character * @return true if the character terminates a PDF name, otherwise false. */ protected boolean isEndOfName(int ch) { return ch == ASCII_SPACE || ch == ASCII_CR || ch == ASCII_LF || ch == 9 || ch == '>' || ch == '<' || ch == '[' || ch == '/' || ch == ']' || ch == ')' || ch == '('; } /** * This will parse a PDF name from the stream. * * @return The parsed PDF name. * @throws IOException If there is an error reading from the stream. */ protected COSName parseCOSName() throws IOException { readExpectedChar('/'); ByteArrayOutputStream buffer = new ByteArrayOutputStream(); int c = seqSource.read(); while (c != -1) { int ch = c; if (ch == '#') { char ch1 = (char) seqSource.read(); char ch2 = (char) seqSource.read(); // Prior to PDF v1.2, the # was not a special character. Also, // it has been observed that various PDF tools do not follow the // spec with respect to the # escape, even though they report // PDF versions of 1.2 or later. The solution here is that we // interpret the # as an escape only when it is followed by two // valid hex digits. // if (isHexDigit(ch1) && isHexDigit(ch2)) { String hex = "" + ch1 + ch2; try { buffer.write(Integer.parseInt(hex, 16)); } catch (NumberFormatException e) { throw new IOException("Error: expected hex digit, actual='" + hex + "'", e); } c = seqSource.read(); } else { seqSource.unread(ch2); c = ch1; buffer.write(ch); } } else if (isEndOfName(ch)) { break; } else { buffer.write(ch); c = seqSource.read(); } } if (c != -1) { seqSource.unread(c); } String string = new String(buffer.toByteArray(), Charsets.UTF_8); return COSName.getPDFName(string); } /** * This will parse a boolean object from the stream. * * @return The parsed boolean object. * * @throws IOException If an IO error occurs during parsing. */ protected COSBoolean parseBoolean() throws IOException { COSBoolean retval = null; char c = (char) seqSource.peek(); if (c == 't') { String trueString = new String(seqSource.readFully(4), ISO_8859_1); if (!trueString.equals(TRUE)) { throw new IOException("Error parsing boolean: expected='true' actual='" + trueString + "' at offset " + seqSource.getPosition()); } else { retval = COSBoolean.TRUE; } } else if( c == 'f' ) { String falseString = new String(seqSource.readFully(5), ISO_8859_1); if (!falseString.equals(FALSE)) { throw new IOException( "Error parsing boolean: expected='true' actual='" + falseString + "' at offset " + seqSource.getPosition()); } else { retval = COSBoolean.FALSE; } } else { throw new IOException( "Error parsing boolean expected='t or f' actual='" + c + "' at offset " + seqSource.getPosition()); } return retval; } /** * This will parse a directory object from the stream. * * @return The parsed object. * * @throws IOException If there is an error during parsing. */ protected COSBase parseDirObject() throws IOException { COSBase retval = null; skipSpaces(); int nextByte = seqSource.peek(); char c = (char) nextByte; switch (c) { case '<': { //pull off first left bracket int leftBracket = seqSource.read(); //check for second left bracket c = (char) seqSource.peek(); seqSource.unread(leftBracket); if (c == '<') { retval = parseCOSDictionary(); skipSpaces(); } else { retval = parseCOSString(); } break; } case '[': { // array retval = parseCOSArray(); break; } case '(': retval = parseCOSString(); break; case '/': // name retval = parseCOSName(); break; case 'n': { // null readExpectedString(NULL); retval = COSNull.NULL; break; } case 't': { String trueString = new String(seqSource.readFully(4), ISO_8859_1); if (trueString.equals(TRUE)) { retval = COSBoolean.TRUE; } else { throw new IOException("expected true actual='" + trueString + "' " + seqSource + "' at offset " + seqSource.getPosition()); } break; } case 'f': { String falseString = new String(seqSource.readFully(5), ISO_8859_1); if (falseString.equals(FALSE)) { retval = COSBoolean.FALSE; } else { throw new IOException( "expected false actual='" + falseString + "' " + seqSource + "' at offset " + seqSource.getPosition()); } break; } case 'R': seqSource.read(); retval = new COSObject(null); break; case (char) -1: return null; default: { if (Character.isDigit(c) || c == '-' || c == '+' || c == '.') { StringBuilder buf = new StringBuilder(); int ic = seqSource.read(); c = (char) ic; while (Character.isDigit(c) || c == '-' || c == '+' || c == '.' || c == 'E' || c == 'e') { buf.append(c); ic = seqSource.read(); c = (char) ic; } if (ic != -1) { seqSource.unread(ic); } retval = COSNumber.get(buf.toString()); } else { //This is not suppose to happen, but we will allow for it //so we are more compatible with POS writers that don't //follow the spec String badString = readString(); if( badString == null || badString.length() == 0 ) { int peek = seqSource.peek(); // we can end up in an infinite loop otherwise throw new IOException("Unknown dir object c='" + c + "' cInt=" + (int) c + " peek='" + (char) peek + "' peekInt=" + peek + " " + seqSource.getPosition()); } // if it's an endstream/endobj, we want to put it back so the caller will see it if(ENDOBJ_STRING.equals(badString) || ENDSTREAM_STRING.equals(badString)) { seqSource.unread(badString.getBytes(ISO_8859_1)); } } } } return retval; } /** * This will read the next string from the stream. * * @return The string that was read from the stream. * * @throws IOException If there is an error reading from the stream. */ protected String readString() throws IOException { skipSpaces(); StringBuilder buffer = new StringBuilder(); int c = seqSource.read(); while (!isEndOfName((char) c) && c != -1) { buffer.append((char) c); c = seqSource.read(); } if (c != -1) { seqSource.unread(c); } return buffer.toString(); } /** * Read one String and throw an exception if it is not the expected value. * * @param expectedString the String value that is expected. * @throws IOException if the String char is not the expected value or if an * I/O error occurs. */ protected void readExpectedString(String expectedString) throws IOException { readExpectedString(expectedString.toCharArray(), false); } /** * Reads given pattern from {@link #seqSource}. Skipping whitespace at start and end if wanted. * * @param expectedString pattern to be skipped * @param skipSpaces if set to true spaces before and after the string will be skipped * @throws IOException if pattern could not be read */ protected final void readExpectedString(final char[] expectedString, boolean skipSpaces) throws IOException { skipSpaces(); for (char c : expectedString) { if (seqSource.read() != c) { throw new IOException("Expected string '" + new String(expectedString) + "' but missed at character '" + c + "' at offset " + seqSource.getPosition()); } } skipSpaces(); } /** * Read one char and throw an exception if it is not the expected value. * * @param ec the char value that is expected. * @throws IOException if the read char is not the expected value or if an * I/O error occurs. */ protected void readExpectedChar(char ec) throws IOException { char c = (char) seqSource.read(); if (c != ec) { throw new IOException( "expected='" + ec + "' actual='" + c + "' at offset " + seqSource.getPosition()); } } /** * This will read the next string from the stream up to a certain length. * * @param length The length to stop reading at. * * @return The string that was read from the stream of length 0 to length. * * @throws IOException If there is an error reading from the stream. */ protected String readString( int length ) throws IOException { skipSpaces(); int c = seqSource.read(); //average string size is around 2 and the normal string buffer size is //about 16 so lets save some space. StringBuilder buffer = new StringBuilder(length); while( !isWhitespace(c) && !isClosing(c) && c != -1 && buffer.length() < length && c != '[' && c != '<' && c != '(' && c != '/' ) { buffer.append( (char)c ); c = seqSource.read(); } if (c != -1) { seqSource.unread(c); } return buffer.toString(); } /** * This will tell if the next character is a closing brace( close of PDF array ). * * @return true if the next byte is ']', false otherwise. * * @throws IOException If an IO error occurs. */ protected boolean isClosing() throws IOException { return isClosing(seqSource.peek()); } /** * This will tell if the next character is a closing brace( close of PDF array ). * * @param c The character to check against end of line * @return true if the next byte is ']', false otherwise. */ protected boolean isClosing(int c) { return c == ']'; } /** * This will read bytes until the first end of line marker occurs. * NOTE: The EOL marker may consists of 1 (CR or LF) or 2 (CR and CL) bytes * which is an important detail if one wants to unread the line. * * @return The characters between the current position and the end of the line. * * @throws IOException If there is an error reading from the stream. */ protected String readLine() throws IOException { if (seqSource.isEOF()) { throw new IOException("Error: End-of-File, expected line"); } StringBuilder buffer = new StringBuilder( 11 ); int c; while ((c = seqSource.read()) != -1) { // CR and LF are valid EOLs if (isEOL(c)) { break; } buffer.append((char) c); } // CR+LF is also a valid EOL if (isCR(c) && isLF(seqSource.peek())) { seqSource.read(); } return buffer.toString(); } /** * This will tell if the next byte to be read is an end of line byte. * * @return true if the next byte is 0x0A or 0x0D. * * @throws IOException If there is an error reading from the stream. */ protected boolean isEOL() throws IOException { return isEOL(seqSource.peek()); } /** * This will tell if the next byte to be read is an end of line byte. * * @param c The character to check against end of line * @return true if the next byte is 0x0A or 0x0D. */ protected boolean isEOL(int c) { return isLF(c) || isCR(c); } private boolean isLF(int c) { return ASCII_LF == c; } private boolean isCR(int c) { return ASCII_CR == c; } /** * This will tell if the next byte is whitespace or not. * * @return true if the next byte in the stream is a whitespace character. * * @throws IOException If there is an error reading from the stream. */ protected boolean isWhitespace() throws IOException { return isWhitespace(seqSource.peek()); } /** * This will tell if a character is whitespace or not. These values are * specified in table 1 (page 12) of ISO 32000-1:2008. * @param c The character to check against whitespace * @return true if the character is a whitespace character. */ protected boolean isWhitespace( int c ) { return c == 0 || c == 9 || c == 12 || c == ASCII_LF || c == ASCII_CR || c == ASCII_SPACE; } /** * This will tell if the next byte is a space or not. * * @return true if the next byte in the stream is a space character. * * @throws IOException If there is an error reading from the stream. */ protected boolean isSpace() throws IOException { return isSpace(seqSource.peek()); } /** * This will tell if the given value is a space or not. * * @param c The character to check against space * @return true if the next byte in the stream is a space character. */ protected boolean isSpace(int c) { return ASCII_SPACE == c; } /** * This will tell if the next byte is a digit or not. * * @return true if the next byte in the stream is a digit. * * @throws IOException If there is an error reading from the stream. */ protected boolean isDigit() throws IOException { return isDigit(seqSource.peek()); } /** * This will tell if the given value is a digit or not. * * @param c The character to be checked * @return true if the next byte in the stream is a digit. */ protected static boolean isDigit(int c) { return c >= ASCII_ZERO && c <= ASCII_NINE; } /** * This will skip all spaces and comments that are present. * * @throws IOException If there is an error reading from the stream. */ protected void skipSpaces() throws IOException { int c = seqSource.read(); // 37 is the % character, a comment while (isWhitespace(c) || c == 37) { if (c == 37) { // skip past the comment section c = seqSource.read(); while (!isEOL(c) && c != -1) { c = seqSource.read(); } } else { c = seqSource.read(); } } if (c != -1) { seqSource.unread(c); } //log( "skipSpaces() done peek='" + (char)seqSource.peek() + "'" ); } /** * This will read a long from the Stream and throw an {@link IOException} if * the long value is negative or has more than 10 digits (i.e. : bigger than * {@link #OBJECT_NUMBER_THRESHOLD}) * @return the object number being read. * @throws IOException if an I/O error occurs */ protected int readObjectNumber() throws IOException { int retval = readInt(); if (retval < 0 || retval >= OBJECT_NUMBER_THRESHOLD) { throw new IOException("Object Number '" + retval + "' has more than 10 digits or is negative"); } return retval; } /** * This will read a integer from the Stream and throw an {@link IllegalArgumentException} if the integer value * has more than the maximum object revision (i.e. : bigger than {@link #GENERATION_NUMBER_THRESHOLD}) * @return the generation number being read. * @throws IOException if an I/O error occurs */ protected int readGenerationNumber() throws IOException { int retval = readInt(); if(retval < 0 || retval > GENERATION_NUMBER_THRESHOLD) { throw new IOException("Generation Number '" + retval + "' has more than 5 digits"); } return retval; } /** * This will read an integer from the stream. * * @return The integer that was read from the stream. * * @throws IOException If there is an error reading from the stream. */ protected int readInt() throws IOException { skipSpaces(); int retval = 0; StringBuilder intBuffer = readStringNumber(); try { retval = Integer.parseInt( intBuffer.toString() ); } catch( NumberFormatException e ) { seqSource.unread(intBuffer.toString().getBytes(ISO_8859_1)); throw new IOException( "Error: Expected an integer type at offset " + seqSource.getPosition(), e); } return retval; } /** * This will read an long from the stream. * * @return The long that was read from the stream. * * @throws IOException If there is an error reading from the stream. */ protected long readLong() throws IOException { skipSpaces(); long retval = 0; StringBuilder longBuffer = readStringNumber(); try { retval = Long.parseLong( longBuffer.toString() ); } catch( NumberFormatException e ) { seqSource.unread(longBuffer.toString().getBytes(ISO_8859_1)); throw new IOException("Error: Expected a long type at offset " + seqSource.getPosition() + ", instead got '" + longBuffer + "'", e); } return retval; } /** * This method is used to read a token by the {@linkplain #readInt()} method * and the {@linkplain #readLong()} method. * * @return the token to parse as integer or long by the calling method. * @throws IOException throws by the {@link #seqSource} methods. */ protected final StringBuilder readStringNumber() throws IOException { int lastByte = 0; StringBuilder buffer = new StringBuilder(); while ((lastByte = seqSource.read()) != ASCII_SPACE && lastByte != ASCII_LF && lastByte != ASCII_CR && lastByte != 60 && //see sourceforge bug 1714707 lastByte != '[' && // PDFBOX-1845 lastByte != '(' && // PDFBOX-2579 lastByte != 0 && //See sourceforge bug 853328 lastByte != -1) { buffer.append((char) lastByte); } if (lastByte != -1) { seqSource.unread(lastByte); } return buffer; } }