/*
* Reference ETL Parser for Java
* Copyright (c) 2000-2009 Constantine A Plotnikov
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without restriction,
* including without limitation the rights to use, copy, modify, merge,
* publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so,
* subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package net.sf.etl.parsers.internal.term_parser;
import net.sf.etl.parsers.PhraseParser;
import net.sf.etl.parsers.PhraseToken;
import net.sf.etl.parsers.PhraseTokens;
import net.sf.etl.parsers.StandardGrammars;
import net.sf.etl.parsers.SyntaxRole;
import net.sf.etl.parsers.TermContext;
import net.sf.etl.parsers.TermToken;
import net.sf.etl.parsers.Terms;
import net.sf.etl.parsers.TextPos;
import net.sf.etl.parsers.Tokens;
import net.sf.etl.parsers.internal.term_parser.DefaultTermParser.GrammarChanger;
/**
* Initial peer for term parser. This peer is also prototype for future code
* generation efforts. Because of later goal, the peer is more complex than
* deserve.
*
* @author const
*/
class DoctypePeer extends AbstractPeer {
/** parser is outside of any segment */
private static final int OUTSIDE_SEGMENT = 0;
/** parser is outside of any segment */
private static final int ON_SEGMENT_START = 1;
/** parser is outside of any segment */
private static final int AFTER_SEGMENT_START = 2;
/** parser is outside of any segment */
private static final int ON_EOF = 3;
/** parser is outside of any segment */
private static final int ON_DOCTYPE = 4;
/** initial state when public identifier should be started */
private static final int ON_PUBLIC_START = 5;
/** state on white space right after doctype token */
private static final int AFTER_DOCTYPE = 6;
/** state on the start of system id string */
private static final int ON_SYSTEM_START = 7;
/** state on the error in doctype */
private static final int ON_DOCTYPE_ERROR = 8;
/** state inside the system id processing */
private static final int SYSTEM_ID_CONT1 = 9;
/** state inside the system id processing */
private static final int SYSTEM_ID_CONT2 = 10;
/** state inside the system id processing */
private static final int AFTER_SYSTEM_ID = 11;
/** state inside the context processing */
private static final int ON_CONTEXT_START = 12;
/** doctype end */
private static final int ON_DOCTYPE_END = 13;
/** state inside the public id processing */
private static final int PUBLIC_ID_CONT1 = 14;
/** state inside the public id processing */
private static final int PUBLIC_ID_CONT2 = 15;
/** state inside the public id processing */
private static final int PUBLIC_ID_CONT3 = 16;
/** state inside the public id processing */
private static final int PUBLIC_ID_CONT4 = 17;
/** state inside the public id processing */
private static final int AFTER_PUBLIC_ID = 18;
/** state inside the public id processing */
private static final int ERROR_IN_PUBLIC_ID = 19;
/** state inside the public id processing */
private static final int ERROR_IN_PUBLIC_ID2 = 20;
/** state inside the context processing */
private static final int CONTEXT_CONT1 = 21;
/** state inside the context processing */
private static final int CONTEXT_CONT2 = 22;
/** state inside the context processing */
private static final int CONTEXT_CONT3 = 23;
/** state inside the context processing */
private static final int ERROR_IN_CONTEXT = 24;
/** state inside the context processing */
private static final int CONTEXT_CONT4 = 25;
/** state inside the context processing */
private static final int AFTER_CONTEXT = 26;
/** state inside the context processing */
private static final int ERROR_IN_CONTEXT2 = 27;
/** state when segment end is reported */
private static final int ON_SEGMENT_END = 28;
/** in this state parser notifies term parser about grammar */
private static final int ON_NOTIFY_ABOUT_GRAMMAR = 29;
/** state of the object */
private int state = OUTSIDE_SEGMENT;
/** saved start segment */
private PhraseToken savedStartSegment;
/** seen error during parsing */
private boolean seenError = false;
/** This token keeps system identifier if one was specified */
private PhraseToken systemIdToken;
/** This token keeps public identifier if one was specified */
private PhraseToken publicIdToken;
/** This token keeps context identifier if one was specified */
private PhraseToken contextToken;
/** start of doctype directive */
private TextPos doctypeStart;
/** end of doctype directive */
private TextPos doctypeEnd;
/** grammar change interface for the parser */
private final GrammarChanger grammarChanger;
/**
* A constructor for this peer
*
* @param termParser
* term parser to use
* @param phraseParser
* phrase parser to use
* @param grammarChanger
* grammar changer object
*/
public DoctypePeer(DefaultTermParser termParser, PhraseParser phraseParser,
GrammarChanger grammarChanger) {
super(termParser, phraseParser);
this.grammarChanger = grammarChanger;
}
/**
* @see net.sf.etl.parsers.internal.term_parser.AbstractPeer#parseMore()
*/
@Override
public boolean parseMore() {
// This cycle and switch inside implements state machine pattern.
// Note that the cycle represent actual cycle that will be used inside
// generated code.
loop: while (true) {
switch (state) {
case OUTSIDE_SEGMENT:
if (skipIgnorable(false)) {
return true;
}
switch (phraseParser.current().kind()) {
case START_SEGMENT:
state = ON_SEGMENT_START;
continue loop;
case EOF:
state = ON_EOF;
continue loop;
default:
assert false : "Parser should never be here"
+ phraseParser.current().kind();
}
break;
case ON_SEGMENT_START:
termParser.pushMark();
// SPECIAL PROCESSING: Differently from other parsers, this
// parser does not reports anything on segment start because
// it is not yet clear to which grammar the token would
// belong.
savedStartSegment = phraseParser.current();
phraseParser.advance();
state = AFTER_SEGMENT_START;
continue loop;
case AFTER_SEGMENT_START:
if (!phraseParser.current().hasToken()) {
grammarChanger
.continueWithDefaultGrammar(savedStartSegment);
return false;
}
switch (tokenKindValue()) {
case IDENTIFIER:
if (matchText("doctype")) {
doctypeStart = phraseParser.current().start();
state = ON_DOCTYPE;
reportObjectStartAtMark(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE);
termParser.insertAtMark(new TermToken(
StandardGrammars.DOCTYPE_CONTEXT,
Terms.SEGMENT_START, null, savedStartSegment));
savedStartSegment = null;
if (termParser.popMark()) {
return true;
}
continue loop;
}
default:
grammarChanger
.continueWithDefaultGrammar(savedStartSegment);
return false;
}
// -------- parsing doctype keyword--------------
case ON_DOCTYPE: {
state = AFTER_DOCTYPE;
if (reportStructural(SyntaxRole.KEYWORD)) {
return true;
}
continue loop;
}
case AFTER_DOCTYPE:
if (skipIgnorable(true)) {
return true;
}
if (matchText("public")) {
state = ON_PUBLIC_START;
} else if (matchString()) {
state = ON_SYSTEM_START;
} else {
state = ON_DOCTYPE_ERROR;
}
continue loop;
// --------- parsing system identifier -----------
case ON_SYSTEM_START:
state = SYSTEM_ID_CONT1;
if (reportPropertyStart(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_SYSTEM_ID)) {
return true;
}
continue loop;
case SYSTEM_ID_CONT1:
state = SYSTEM_ID_CONT2;
systemIdToken = phraseParser.current();
if (reportPrimiaryAndAdvance()) {
return true;
}
continue loop;
case SYSTEM_ID_CONT2:
state = AFTER_SYSTEM_ID;
if (reportPropertyEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_SYSTEM_ID)) {
return true;
}
continue loop;
case AFTER_SYSTEM_ID:
if (skipIgnorable(true)) {
return true;
}
if (matchText("public")) {
state = ON_PUBLIC_START;
} else if (matchText("context")) {
state = ON_CONTEXT_START;
} else if (matchToken(PhraseTokens.END_SEGMENT)) {
state = ON_DOCTYPE_END;
} else {
state = ON_DOCTYPE_ERROR;
if (reportSyntaxError(new Object[] { Tokens.STRING,
"public" })) {
return true;
}
}
continue loop;
// ----------- parsing public identifier -------------
case ON_PUBLIC_START:
state = PUBLIC_ID_CONT1;
if (reportPropertyStart(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_PUBLIC_ID)) {
return true;
}
continue loop;
case PUBLIC_ID_CONT1:
state = PUBLIC_ID_CONT2;
if (reportStructural(SyntaxRole.KEYWORD)) {
return true;
}
continue loop;
case PUBLIC_ID_CONT2:
if (skipIgnorable(true)) {
return true;
}
if (matchString()) {
state = PUBLIC_ID_CONT3;
} else {
state = ERROR_IN_PUBLIC_ID;
}
continue loop;
case PUBLIC_ID_CONT3:
state = PUBLIC_ID_CONT4;
publicIdToken = phraseParser.current();
if (reportPrimiaryAndAdvance()) {
return true;
}
continue loop;
case PUBLIC_ID_CONT4:
state = AFTER_PUBLIC_ID;
if (reportPropertyEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_PUBLIC_ID)) {
return true;
}
continue loop;
case AFTER_PUBLIC_ID:
if (skipIgnorable(true)) {
return true;
}
if (matchText("context")) {
state = ON_CONTEXT_START;
} else if (matchToken(PhraseTokens.END_SEGMENT)) {
state = ON_DOCTYPE_END;
} else {
state = ON_DOCTYPE_ERROR;
}
continue loop;
case ERROR_IN_PUBLIC_ID:
state = ERROR_IN_PUBLIC_ID2;
if (reportSyntaxError(Tokens.STRING)) {
return true;
}
case ERROR_IN_PUBLIC_ID2:
state = ON_DOCTYPE_ERROR;
if (reportPropertyEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_PUBLIC_ID)) {
return true;
}
continue loop;
// ----- parsing context -------
case ON_CONTEXT_START:
state = CONTEXT_CONT1;
if (reportPropertyStart(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_CONTEXT)) {
return true;
}
continue loop;
case CONTEXT_CONT1:
state = CONTEXT_CONT2;
if (reportStructural(SyntaxRole.KEYWORD)) {
return true;
}
continue loop;
case CONTEXT_CONT2:
if (skipIgnorable(true)) {
return true;
}
if (matchString()) {
state = CONTEXT_CONT3;
} else {
state = ERROR_IN_CONTEXT;
}
continue loop;
case CONTEXT_CONT3:
state = CONTEXT_CONT4;
contextToken = phraseParser.current();
if (reportPrimiaryAndAdvance()) {
return true;
}
continue loop;
case CONTEXT_CONT4:
state = AFTER_CONTEXT;
if (reportPropertyEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_CONTEXT)) {
return true;
}
continue loop;
case AFTER_CONTEXT:
if (skipIgnorable(true)) {
return true;
}
if (matchToken(PhraseTokens.END_SEGMENT)) {
state = ON_DOCTYPE_END;
} else {
state = ON_DOCTYPE_ERROR;
}
continue loop;
case ERROR_IN_CONTEXT:
state = ERROR_IN_CONTEXT2;
if (reportSyntaxError(Tokens.STRING)) {
return true;
}
case ERROR_IN_CONTEXT2:
state = ON_DOCTYPE_ERROR;
if (reportPropertyEnd((StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_CONTEXT))) {
return true;
}
continue loop;
case ON_DOCTYPE_END:
state = ON_SEGMENT_END;
doctypeEnd = phraseParser.current().end();
if (reportObjectEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE)) {
return true;
}
continue loop;
case ON_SEGMENT_END:
state = ON_NOTIFY_ABOUT_GRAMMAR;
if (reportControlAndAdvance(Terms.SEGMENT_END,
StandardGrammars.DOCTYPE_CONTEXT)) {
return true;
}
continue loop;
case ON_NOTIFY_ABOUT_GRAMMAR:
grammarChanger.continueWithNewGrammar(doctypeStart, doctypeEnd,
systemIdToken, publicIdToken, contextToken, seenError);
return false;
case ON_DOCTYPE_ERROR:
seenError = true;
if (skipUntilSegmentEnd()) {
return true;
}
state = ON_DOCTYPE_END;
continue loop;
case ON_EOF:
reportEOF();
return true;
default:
throw new RuntimeException("Unknown State: " + state);
}
}
}
/**
* @return true if string matches
*/
private boolean matchString() {
return matchToken(Tokens.STRING)
&& ("\"".equals(token().quote()) || "\'"
.equals(token().quote()));
}
/**
* {@inheritDoc}
*/
@Override
protected boolean startSource(String grammarContext, boolean inSegment) {
if (inSegment) {
throw new RuntimeException(
"[BUG]Parsing doctype should never start in segment");
}
if (!"DoctypeContext".equals(grammarContext)) {
throw new IllegalArgumentException("Invalid context name: "
+ grammarContext);
}
// do nothing as default state suits it nicely
return true;
}
/**
* {@inheritDoc}
*/
@Override
public TermContext getInitalContext() {
return StandardGrammars.DOCTYPE_CONTEXT;
}
}