/* * Reference ETL Parser for Java * Copyright (c) 2000-2009 Constantine A Plotnikov * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without restriction, * including without limitation the rights to use, copy, modify, merge, * publish, distribute, sublicense, and/or sell copies of the Software, * and to permit persons to whom the Software is furnished to do so, * subject to the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ package net.sf.etl.parsers.internal.term_parser; import net.sf.etl.parsers.PhraseParser; import net.sf.etl.parsers.PhraseToken; import net.sf.etl.parsers.PhraseTokens; import net.sf.etl.parsers.StandardGrammars; import net.sf.etl.parsers.SyntaxRole; import net.sf.etl.parsers.TermContext; import net.sf.etl.parsers.TermToken; import net.sf.etl.parsers.Terms; import net.sf.etl.parsers.TextPos; import net.sf.etl.parsers.Tokens; import net.sf.etl.parsers.internal.term_parser.DefaultTermParser.GrammarChanger; /** * Initial peer for term parser. This peer is also prototype for future code * generation efforts. Because of later goal, the peer is more complex than * deserve. * * @author const */ class DoctypePeer extends AbstractPeer { /** parser is outside of any segment */ private static final int OUTSIDE_SEGMENT = 0; /** parser is outside of any segment */ private static final int ON_SEGMENT_START = 1; /** parser is outside of any segment */ private static final int AFTER_SEGMENT_START = 2; /** parser is outside of any segment */ private static final int ON_EOF = 3; /** parser is outside of any segment */ private static final int ON_DOCTYPE = 4; /** initial state when public identifier should be started */ private static final int ON_PUBLIC_START = 5; /** state on white space right after doctype token */ private static final int AFTER_DOCTYPE = 6; /** state on the start of system id string */ private static final int ON_SYSTEM_START = 7; /** state on the error in doctype */ private static final int ON_DOCTYPE_ERROR = 8; /** state inside the system id processing */ private static final int SYSTEM_ID_CONT1 = 9; /** state inside the system id processing */ private static final int SYSTEM_ID_CONT2 = 10; /** state inside the system id processing */ private static final int AFTER_SYSTEM_ID = 11; /** state inside the context processing */ private static final int ON_CONTEXT_START = 12; /** doctype end */ private static final int ON_DOCTYPE_END = 13; /** state inside the public id processing */ private static final int PUBLIC_ID_CONT1 = 14; /** state inside the public id processing */ private static final int PUBLIC_ID_CONT2 = 15; /** state inside the public id processing */ private static final int PUBLIC_ID_CONT3 = 16; /** state inside the public id processing */ private static final int PUBLIC_ID_CONT4 = 17; /** state inside the public id processing */ private static final int AFTER_PUBLIC_ID = 18; /** state inside the public id processing */ private static final int ERROR_IN_PUBLIC_ID = 19; /** state inside the public id processing */ private static final int ERROR_IN_PUBLIC_ID2 = 20; /** state inside the context processing */ private static final int CONTEXT_CONT1 = 21; /** state inside the context processing */ private static final int CONTEXT_CONT2 = 22; /** state inside the context processing */ private static final int CONTEXT_CONT3 = 23; /** state inside the context processing */ private static final int ERROR_IN_CONTEXT = 24; /** state inside the context processing */ private static final int CONTEXT_CONT4 = 25; /** state inside the context processing */ private static final int AFTER_CONTEXT = 26; /** state inside the context processing */ private static final int ERROR_IN_CONTEXT2 = 27; /** state when segment end is reported */ private static final int ON_SEGMENT_END = 28; /** in this state parser notifies term parser about grammar */ private static final int ON_NOTIFY_ABOUT_GRAMMAR = 29; /** state of the object */ private int state = OUTSIDE_SEGMENT; /** saved start segment */ private PhraseToken savedStartSegment; /** seen error during parsing */ private boolean seenError = false; /** This token keeps system identifier if one was specified */ private PhraseToken systemIdToken; /** This token keeps public identifier if one was specified */ private PhraseToken publicIdToken; /** This token keeps context identifier if one was specified */ private PhraseToken contextToken; /** start of doctype directive */ private TextPos doctypeStart; /** end of doctype directive */ private TextPos doctypeEnd; /** grammar change interface for the parser */ private final GrammarChanger grammarChanger; /** * A constructor for this peer * * @param termParser * term parser to use * @param phraseParser * phrase parser to use * @param grammarChanger * grammar changer object */ public DoctypePeer(DefaultTermParser termParser, PhraseParser phraseParser, GrammarChanger grammarChanger) { super(termParser, phraseParser); this.grammarChanger = grammarChanger; } /** * @see net.sf.etl.parsers.internal.term_parser.AbstractPeer#parseMore() */ @Override public boolean parseMore() { // This cycle and switch inside implements state machine pattern. // Note that the cycle represent actual cycle that will be used inside // generated code. loop: while (true) { switch (state) { case OUTSIDE_SEGMENT: if (skipIgnorable(false)) { return true; } switch (phraseParser.current().kind()) { case START_SEGMENT: state = ON_SEGMENT_START; continue loop; case EOF: state = ON_EOF; continue loop; default: assert false : "Parser should never be here" + phraseParser.current().kind(); } break; case ON_SEGMENT_START: termParser.pushMark(); // SPECIAL PROCESSING: Differently from other parsers, this // parser does not reports anything on segment start because // it is not yet clear to which grammar the token would // belong. savedStartSegment = phraseParser.current(); phraseParser.advance(); state = AFTER_SEGMENT_START; continue loop; case AFTER_SEGMENT_START: if (!phraseParser.current().hasToken()) { grammarChanger .continueWithDefaultGrammar(savedStartSegment); return false; } switch (tokenKindValue()) { case IDENTIFIER: if (matchText("doctype")) { doctypeStart = phraseParser.current().start(); state = ON_DOCTYPE; reportObjectStartAtMark(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE); termParser.insertAtMark(new TermToken( StandardGrammars.DOCTYPE_CONTEXT, Terms.SEGMENT_START, null, savedStartSegment)); savedStartSegment = null; if (termParser.popMark()) { return true; } continue loop; } default: grammarChanger .continueWithDefaultGrammar(savedStartSegment); return false; } // -------- parsing doctype keyword-------------- case ON_DOCTYPE: { state = AFTER_DOCTYPE; if (reportStructural(SyntaxRole.KEYWORD)) { return true; } continue loop; } case AFTER_DOCTYPE: if (skipIgnorable(true)) { return true; } if (matchText("public")) { state = ON_PUBLIC_START; } else if (matchString()) { state = ON_SYSTEM_START; } else { state = ON_DOCTYPE_ERROR; } continue loop; // --------- parsing system identifier ----------- case ON_SYSTEM_START: state = SYSTEM_ID_CONT1; if (reportPropertyStart(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_SYSTEM_ID)) { return true; } continue loop; case SYSTEM_ID_CONT1: state = SYSTEM_ID_CONT2; systemIdToken = phraseParser.current(); if (reportPrimiaryAndAdvance()) { return true; } continue loop; case SYSTEM_ID_CONT2: state = AFTER_SYSTEM_ID; if (reportPropertyEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_SYSTEM_ID)) { return true; } continue loop; case AFTER_SYSTEM_ID: if (skipIgnorable(true)) { return true; } if (matchText("public")) { state = ON_PUBLIC_START; } else if (matchText("context")) { state = ON_CONTEXT_START; } else if (matchToken(PhraseTokens.END_SEGMENT)) { state = ON_DOCTYPE_END; } else { state = ON_DOCTYPE_ERROR; if (reportSyntaxError(new Object[] { Tokens.STRING, "public" })) { return true; } } continue loop; // ----------- parsing public identifier ------------- case ON_PUBLIC_START: state = PUBLIC_ID_CONT1; if (reportPropertyStart(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_PUBLIC_ID)) { return true; } continue loop; case PUBLIC_ID_CONT1: state = PUBLIC_ID_CONT2; if (reportStructural(SyntaxRole.KEYWORD)) { return true; } continue loop; case PUBLIC_ID_CONT2: if (skipIgnorable(true)) { return true; } if (matchString()) { state = PUBLIC_ID_CONT3; } else { state = ERROR_IN_PUBLIC_ID; } continue loop; case PUBLIC_ID_CONT3: state = PUBLIC_ID_CONT4; publicIdToken = phraseParser.current(); if (reportPrimiaryAndAdvance()) { return true; } continue loop; case PUBLIC_ID_CONT4: state = AFTER_PUBLIC_ID; if (reportPropertyEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_PUBLIC_ID)) { return true; } continue loop; case AFTER_PUBLIC_ID: if (skipIgnorable(true)) { return true; } if (matchText("context")) { state = ON_CONTEXT_START; } else if (matchToken(PhraseTokens.END_SEGMENT)) { state = ON_DOCTYPE_END; } else { state = ON_DOCTYPE_ERROR; } continue loop; case ERROR_IN_PUBLIC_ID: state = ERROR_IN_PUBLIC_ID2; if (reportSyntaxError(Tokens.STRING)) { return true; } case ERROR_IN_PUBLIC_ID2: state = ON_DOCTYPE_ERROR; if (reportPropertyEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_PUBLIC_ID)) { return true; } continue loop; // ----- parsing context ------- case ON_CONTEXT_START: state = CONTEXT_CONT1; if (reportPropertyStart(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_CONTEXT)) { return true; } continue loop; case CONTEXT_CONT1: state = CONTEXT_CONT2; if (reportStructural(SyntaxRole.KEYWORD)) { return true; } continue loop; case CONTEXT_CONT2: if (skipIgnorable(true)) { return true; } if (matchString()) { state = CONTEXT_CONT3; } else { state = ERROR_IN_CONTEXT; } continue loop; case CONTEXT_CONT3: state = CONTEXT_CONT4; contextToken = phraseParser.current(); if (reportPrimiaryAndAdvance()) { return true; } continue loop; case CONTEXT_CONT4: state = AFTER_CONTEXT; if (reportPropertyEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_CONTEXT)) { return true; } continue loop; case AFTER_CONTEXT: if (skipIgnorable(true)) { return true; } if (matchToken(PhraseTokens.END_SEGMENT)) { state = ON_DOCTYPE_END; } else { state = ON_DOCTYPE_ERROR; } continue loop; case ERROR_IN_CONTEXT: state = ERROR_IN_CONTEXT2; if (reportSyntaxError(Tokens.STRING)) { return true; } case ERROR_IN_CONTEXT2: state = ON_DOCTYPE_ERROR; if (reportPropertyEnd((StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE_CONTEXT))) { return true; } continue loop; case ON_DOCTYPE_END: state = ON_SEGMENT_END; doctypeEnd = phraseParser.current().end(); if (reportObjectEnd(StandardGrammars.DOCTYPE_GRAMMAR_DOCTYPE)) { return true; } continue loop; case ON_SEGMENT_END: state = ON_NOTIFY_ABOUT_GRAMMAR; if (reportControlAndAdvance(Terms.SEGMENT_END, StandardGrammars.DOCTYPE_CONTEXT)) { return true; } continue loop; case ON_NOTIFY_ABOUT_GRAMMAR: grammarChanger.continueWithNewGrammar(doctypeStart, doctypeEnd, systemIdToken, publicIdToken, contextToken, seenError); return false; case ON_DOCTYPE_ERROR: seenError = true; if (skipUntilSegmentEnd()) { return true; } state = ON_DOCTYPE_END; continue loop; case ON_EOF: reportEOF(); return true; default: throw new RuntimeException("Unknown State: " + state); } } } /** * @return true if string matches */ private boolean matchString() { return matchToken(Tokens.STRING) && ("\"".equals(token().quote()) || "\'" .equals(token().quote())); } /** * {@inheritDoc} */ @Override protected boolean startSource(String grammarContext, boolean inSegment) { if (inSegment) { throw new RuntimeException( "[BUG]Parsing doctype should never start in segment"); } if (!"DoctypeContext".equals(grammarContext)) { throw new IllegalArgumentException("Invalid context name: " + grammarContext); } // do nothing as default state suits it nicely return true; } /** * {@inheritDoc} */ @Override public TermContext getInitalContext() { return StandardGrammars.DOCTYPE_CONTEXT; } }