package com.tom_roush.pdfbox.pdmodel.common.function.type4; /** * Parser for PDF Type 4 functions. This implements a small subset of the PostScript * language but is no full PostScript interpreter. */ public final class Parser { /** Used to indicate the parsers current state. */ private enum State { NEWLINE, WHITESPACE, COMMENT, TOKEN } private Parser() { //nop } /** * Parses a Type 4 function and sends the syntactic elements to the given * syntax handler. * @param input the text source * @param handler the syntax handler */ public static void parse(CharSequence input, SyntaxHandler handler) { Tokenizer tokenizer = new Tokenizer(input, handler); tokenizer.tokenize(); } /** * This interface defines all possible syntactic elements of a Type 4 function. * It is called by the parser as the function is interpreted. */ public interface SyntaxHandler { /** * Indicates that a new line starts. * @param text the new line character (CR, LF, CR/LF or FF) */ void newLine(CharSequence text); /** * Called when whitespace characters are encountered. * @param text the whitespace text */ void whitespace(CharSequence text); /** * Called when a token is encountered. No distinction between operators and values * is done here. * @param text the token text */ void token(CharSequence text); /** * Called for a comment. * @param text the comment */ void comment(CharSequence text); } /** * Abstract base class for a {@link SyntaxHandler}. */ public abstract static class AbstractSyntaxHandler implements SyntaxHandler { /** {@inheritDoc} */ @Override public void comment(CharSequence text) { //nop } /** {@inheritDoc} */ @Override public void newLine(CharSequence text) { //nop } /** {@inheritDoc} */ @Override public void whitespace(CharSequence text) { //nop } } /** * Tokenizer for Type 4 functions. */ private static final class Tokenizer { private static final char NUL = '\u0000'; //NUL private static final char EOT = '\u0004'; //END OF TRANSMISSION private static final char TAB = '\u0009'; //TAB CHARACTER private static final char FF = '\u000C'; //FORM FEED private static final char CR = '\r'; //CARRIAGE RETURN private static final char LF = '\n'; //LINE FEED private static final char SPACE = '\u0020'; //SPACE private final CharSequence input; private int index; private final SyntaxHandler handler; private State state = State.WHITESPACE; private final StringBuilder buffer = new StringBuilder(); private Tokenizer(CharSequence text, SyntaxHandler syntaxHandler) { this.input = text; this.handler = syntaxHandler; } private boolean hasMore() { return index < input.length(); } private char currentChar() { return input.charAt(index); } private char nextChar() { index++; if (!hasMore()) { return EOT; } else { return currentChar(); } } private char peek() { if (index < input.length() - 1) { return input.charAt(index + 1); } else { return EOT; } } private State nextState() { char ch = currentChar(); switch (ch) { case CR: case LF: case FF: //FF state = State.NEWLINE; break; case NUL: case TAB: case SPACE: state = State.WHITESPACE; break; case '%': state = State.COMMENT; break; default: state = State.TOKEN; } return state; } private void tokenize() { while (hasMore()) { buffer.setLength(0); nextState(); switch (state) { case NEWLINE: scanNewLine(); break; case WHITESPACE: scanWhitespace(); break; case COMMENT: scanComment(); break; default: scanToken(); } } } private void scanNewLine() { assert state == State.NEWLINE; char ch = currentChar(); buffer.append(ch); if (ch == CR && peek() == LF) { //CRLF is treated as one newline buffer.append(nextChar()); } handler.newLine(buffer); nextChar(); } private void scanWhitespace() { assert state == State.WHITESPACE; buffer.append(currentChar()); loop: while (hasMore()) { char ch = nextChar(); switch (ch) { case NUL: case TAB: case SPACE: buffer.append(ch); break; default: break loop; } } handler.whitespace(buffer); } private void scanComment() { assert state == State.COMMENT; buffer.append(currentChar()); loop: while (hasMore()) { char ch = nextChar(); switch (ch) { case CR: case LF: case FF: break loop; default: buffer.append(ch); } } //EOF reached handler.comment(buffer); } private void scanToken() { assert state == State.TOKEN; char ch = currentChar(); buffer.append(ch); switch (ch) { case '{': case '}': handler.token(buffer); nextChar(); return; default: //continue } loop: while (hasMore()) { ch = nextChar(); switch (ch) { case NUL: case TAB: case SPACE: case CR: case LF: case FF: case EOT: case '{': case '}': break loop; default: buffer.append(ch); } } //EOF reached handler.token(buffer); } } }