package com.tom_roush.pdfbox.pdmodel.common.function.type4;
/**
* Parser for PDF Type 4 functions. This implements a small subset of the PostScript
* language but is no full PostScript interpreter.
*/
public final class Parser
{
/** Used to indicate the parsers current state. */
private enum State
{
NEWLINE, WHITESPACE, COMMENT, TOKEN
}
private Parser()
{
//nop
}
/**
* Parses a Type 4 function and sends the syntactic elements to the given
* syntax handler.
* @param input the text source
* @param handler the syntax handler
*/
public static void parse(CharSequence input, SyntaxHandler handler)
{
Tokenizer tokenizer = new Tokenizer(input, handler);
tokenizer.tokenize();
}
/**
* This interface defines all possible syntactic elements of a Type 4 function.
* It is called by the parser as the function is interpreted.
*/
public interface SyntaxHandler
{
/**
* Indicates that a new line starts.
* @param text the new line character (CR, LF, CR/LF or FF)
*/
void newLine(CharSequence text);
/**
* Called when whitespace characters are encountered.
* @param text the whitespace text
*/
void whitespace(CharSequence text);
/**
* Called when a token is encountered. No distinction between operators and values
* is done here.
* @param text the token text
*/
void token(CharSequence text);
/**
* Called for a comment.
* @param text the comment
*/
void comment(CharSequence text);
}
/**
* Abstract base class for a {@link SyntaxHandler}.
*/
public abstract static class AbstractSyntaxHandler implements SyntaxHandler
{
/** {@inheritDoc} */
@Override
public void comment(CharSequence text)
{
//nop
}
/** {@inheritDoc} */
@Override
public void newLine(CharSequence text)
{
//nop
}
/** {@inheritDoc} */
@Override
public void whitespace(CharSequence text)
{
//nop
}
}
/**
* Tokenizer for Type 4 functions.
*/
private static final class Tokenizer
{
private static final char NUL = '\u0000'; //NUL
private static final char EOT = '\u0004'; //END OF TRANSMISSION
private static final char TAB = '\u0009'; //TAB CHARACTER
private static final char FF = '\u000C'; //FORM FEED
private static final char CR = '\r'; //CARRIAGE RETURN
private static final char LF = '\n'; //LINE FEED
private static final char SPACE = '\u0020'; //SPACE
private final CharSequence input;
private int index;
private final SyntaxHandler handler;
private State state = State.WHITESPACE;
private final StringBuilder buffer = new StringBuilder();
private Tokenizer(CharSequence text, SyntaxHandler syntaxHandler)
{
this.input = text;
this.handler = syntaxHandler;
}
private boolean hasMore()
{
return index < input.length();
}
private char currentChar()
{
return input.charAt(index);
}
private char nextChar()
{
index++;
if (!hasMore())
{
return EOT;
}
else
{
return currentChar();
}
}
private char peek()
{
if (index < input.length() - 1)
{
return input.charAt(index + 1);
}
else
{
return EOT;
}
}
private State nextState()
{
char ch = currentChar();
switch (ch)
{
case CR:
case LF:
case FF: //FF
state = State.NEWLINE;
break;
case NUL:
case TAB:
case SPACE:
state = State.WHITESPACE;
break;
case '%':
state = State.COMMENT;
break;
default:
state = State.TOKEN;
}
return state;
}
private void tokenize()
{
while (hasMore())
{
buffer.setLength(0);
nextState();
switch (state)
{
case NEWLINE:
scanNewLine();
break;
case WHITESPACE:
scanWhitespace();
break;
case COMMENT:
scanComment();
break;
default:
scanToken();
}
}
}
private void scanNewLine()
{
assert state == State.NEWLINE;
char ch = currentChar();
buffer.append(ch);
if (ch == CR && peek() == LF)
{
//CRLF is treated as one newline
buffer.append(nextChar());
}
handler.newLine(buffer);
nextChar();
}
private void scanWhitespace()
{
assert state == State.WHITESPACE;
buffer.append(currentChar());
loop:
while (hasMore())
{
char ch = nextChar();
switch (ch)
{
case NUL:
case TAB:
case SPACE:
buffer.append(ch);
break;
default:
break loop;
}
}
handler.whitespace(buffer);
}
private void scanComment()
{
assert state == State.COMMENT;
buffer.append(currentChar());
loop:
while (hasMore())
{
char ch = nextChar();
switch (ch)
{
case CR:
case LF:
case FF:
break loop;
default:
buffer.append(ch);
}
}
//EOF reached
handler.comment(buffer);
}
private void scanToken()
{
assert state == State.TOKEN;
char ch = currentChar();
buffer.append(ch);
switch (ch)
{
case '{':
case '}':
handler.token(buffer);
nextChar();
return;
default:
//continue
}
loop:
while (hasMore())
{
ch = nextChar();
switch (ch)
{
case NUL:
case TAB:
case SPACE:
case CR:
case LF:
case FF:
case EOT:
case '{':
case '}':
break loop;
default:
buffer.append(ch);
}
}
//EOF reached
handler.token(buffer);
}
}
}