// $ANTLR 2.7.6 (2005-12-22): "glycoct_grammar.g" -> "GlycoctParser.java"$ package org.eurocarbdb.sugar.seq.grammar; import org.eurocarbdb.sugar.Anomer; import org.eurocarbdb.sugar.SugarRepeatAnnotation; import antlr.TokenBuffer; import antlr.TokenStreamException; import antlr.TokenStreamIOException; import antlr.ANTLRException; import antlr.LLkParser; import antlr.Token; import antlr.TokenStream; import antlr.RecognitionException; import antlr.NoViableAltException; import antlr.MismatchedTokenException; import antlr.SemanticException; import antlr.ParserSharedInputState; import antlr.collections.impl.BitSet; /***************************************************** *<p> * This class defines an LLk parser based on ANTLR (http://antlr.org) syntax * rules for parsing carbohydrate sequences in GlycoCT syntax, according * to the published GlycoCT spec v3. *</p> *<p> * This class' superclass provides the majority of * the semantic action code that is called from within this grammar. This * is in order to keep the grammar as clear as possible and to facillitate * re-targeting of this grammar to other languages than Java (at time of * writing ANTLR also supports C++, python, C#). *</p> *<p> * Note that source code for this grammar is auto-generated by ANTLR. *</p> * * @see GlycoctLexer * @see GlycoctParserAdaptor * @see ParserAdaptor * @see glycoct_grammar.g * * @author mjh */ public class GlycoctParser extends org.eurocarbdb.sugar.seq.grammar.GlycoctParserAdaptor implements GlycoctParserTokenTypes { /* empty */ protected GlycoctParser(TokenBuffer tokenBuf, int k) { super(tokenBuf,k); tokenNames = _tokenNames; } public GlycoctParser(TokenBuffer tokenBuf) { this(tokenBuf,2); } protected GlycoctParser(TokenStream lexer, int k) { super(lexer,k); tokenNames = _tokenNames; } public GlycoctParser(TokenStream lexer) { this(lexer,2); } public GlycoctParser(ParserSharedInputState state) { super(state,2); tokenNames = _tokenNames; } /** Toplevel rule defining a sugar sequence. */ public final void sugar() throws RecognitionException, TokenStreamException { res_section(); { switch ( LA(1)) { case LIN: { lin_section(); break; } case EOF: case REP: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } { switch ( LA(1)) { case REP: { rep_section(); break; } case EOF: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } match(Token.EOF_TYPE); } /** Rule for a RES (residues) section. */ public final void res_section() throws RecognitionException, TokenStreamException { match(RES); { int _cnt6=0; _loop6: do { if ((LA(1)==INTEGER)) { residue(); } else { if ( _cnt6>=1 ) { break _loop6; } else {throw new NoViableAltException(LT(1), getFilename());} } _cnt6++; } while (true); } } /** Rule for a LIN (linkages) section. */ public final void lin_section() throws RecognitionException, TokenStreamException { match(LIN); { int _cnt9=0; _loop9: do { if ((LA(1)==INTEGER||LA(1)==LITERAL_R)) { linkage(); } else { if ( _cnt9>=1 ) { break _loop9; } else {throw new NoViableAltException(LT(1), getFilename());} } _cnt9++; } while (true); } } /** Rule for the REP (repeats) section. */ public final void rep_section() throws RecognitionException, TokenStreamException { match(REP); repeat_section_specification(); } /** A numbered residue entry in the 'RES' section. */ public final void residue() throws RecognitionException, TokenStreamException { match(INTEGER); residue_specification(); { switch ( LA(1)) { case SEMICOLON: { match(SEMICOLON); break; } case EOF: case LIN: case REP: case INTEGER: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } } /** Rule matching a single linkage declaration. */ public final void linkage() throws RecognitionException, TokenStreamException { Token i = null; Token rti = null; Token rtt = null; Token nrti = null; Token nrtt = null; Token lnrt, lrt; { switch ( LA(1)) { case LITERAL_R: { match(LITERAL_R); break; } case INTEGER: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } i = LT(1); match(INTEGER); match(COLON); rti = LT(1); match(INTEGER); rtt = LT(1); match(IDENTIFIER); match(LPARENTHESIS); lrt=terminus_position(); { switch ( LA(1)) { case HYPHEN: { match(HYPHEN); break; } case PLUS: { match(PLUS); break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } lnrt=terminus_position(); match(RPARENTHESIS); nrti = LT(1); match(INTEGER); nrtt = LT(1); match(IDENTIFIER); { switch ( LA(1)) { case SEMICOLON: { match(SEMICOLON); break; } case EOF: case REP: case INTEGER: case LITERAL_R: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } addLinkage( i, rti, rtt, lrt, lnrt, nrti, nrtt ); } /** Rule for a PRO (heterogeneity) section. */ public final void pro_section() throws RecognitionException, TokenStreamException { match(PRO); { int _cnt12=0; _loop12: do { if ((LA(1)==INTEGER||LA(1)==LITERAL_R)) { linkage(); } else { if ( _cnt12>=1 ) { break _loop12; } else {throw new NoViableAltException(LT(1), getFilename());} } _cnt12++; } while (true); } } /** * This is the rule for a repeat ('REP') section, which starts immediately * after the text 'REP'. */ public final void repeat_section_specification() throws RecognitionException, TokenStreamException { Token i = null; Token r1 = null; Token rtt = null; Token r2 = null; Token nrtt = null; Token t1, t2, lower, upper; match(REP); i = LT(1); match(INTEGER); match(COLON); r1 = LT(1); match(INTEGER); rtt = LT(1); match(IDENTIFIER); match(LPARENTHESIS); t1=terminus_position(); { switch ( LA(1)) { case HYPHEN: { match(HYPHEN); break; } case PLUS: { match(PLUS); break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } t2=terminus_position(); match(RPARENTHESIS); r2 = LT(1); match(INTEGER); nrtt = LT(1); match(IDENTIFIER); match(EQUALS); lower=terminus_position(); match(HYPHEN); upper=terminus_position(); setRepeatRange( i, lower, upper ); { switch ( LA(1)) { case SEMICOLON: { match(SEMICOLON); break; } case RES: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } repeatStarts( i ); res_section(); { switch ( LA(1)) { case LIN: { lin_section(); break; } case EOF: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } RepeatResidueToken r = getRepeat( i ); r.setRootResidueToken( getResidueToken( r2 ) ); r.setLeafResidueToken( getResidueToken( r1 ) ); r.setLinkageBetweenRepeats( createLinkageToken( null, t1, t2 ) ); repeatEnds( i ); } /** * Rule for a single residue, which may be either a monosaccharide, * a substituent, or one of the other types specified by GlycoCT * (ie: INCHI, freetext). */ public final void residue_specification() throws RecognitionException, TokenStreamException { switch ( LA(1)) { case MONOSAC_DECLARATION: { monosac_specification(); break; } case SUBSTIT_DECLARATION: { substit_specification(); break; } case REPEAT_DECLARATION: { repeat_residue_specification(); break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } /** * Rule that tests whether a residue is a monosaccharide. * "Monosaccharide-ness" is implied by matching lexer rule * MONOSAC_DECLARATION. */ public final void monosac_specification() throws RecognitionException, TokenStreamException { match(MONOSAC_DECLARATION); monosaccharide(); } /** * Rule that tests whether a residue is a substituent. * "Substituent-ness" is implied by matching lexer rule * SUBSTIT_DECLARATION. */ public final void substit_specification() throws RecognitionException, TokenStreamException { match(SUBSTIT_DECLARATION); substituent_name(); } /** * Rule that tests whether a residue is a reference to a repeat sub-structure. */ public final void repeat_residue_specification() throws RecognitionException, TokenStreamException { Token i = null; match(REPEAT_DECLARATION); match(IDENTIFIER); i = LT(1); match(INTEGER); addRepeatResidue( i ); } /** * Rule for a monosaccharide, in GlycoCT format, consisting of a * monosaccharide name, its superclass, ring closure positions, * and a list of modifications at each terminus, if any. */ public final void monosaccharide() throws RecognitionException, TokenStreamException { Token c = null; ResidueToken n; n=monosaccharide_name(); addResidue( n ); match(HYPHEN); c = LT(1); match(MONOSAC_SUPERCLASS); setSuperclass( c ); match(HYPHEN); monosac_ring_closure(); { _loop22: do { if ((LA(1)==PIPE)) { monosac_substituents_or_modifications(); } else { break _loop22; } } while (true); } } /** * Rule for a substituent name (ie: non-monosaccharide). * A substituent name may be any hyphen-separated list of identifiers. */ public final void substituent_name() throws RecognitionException, TokenStreamException { Token n = null; Token x = null; n = LT(1); match(IDENTIFIER); { _loop28: do { if ((LA(1)==HYPHEN)) { match(HYPHEN); x = LT(1); match(IDENTIFIER); n.setText( n.getText() + "-" + x.getText() ); } else { break _loop28; } } while (true); } addResidue( createSubstituentToken( n ) ); } /** * Rule for a monosaccharide name. A monosaccharide name in GlycoCT * is basically its stem-type, that is, the name/type given to the * basic monosaccharide sans mods, eg: glc. Note that there may be * multiple stem-types given, separated by hyphens. * * Note also that for the purposes of this rule, anomeric config is * considered part of the monosaccharide name. */ public final ResidueToken monosaccharide_name() throws RecognitionException, TokenStreamException { ResidueToken m = null; Token n = null; Token x = null; n = LT(1); match(IDENTIFIER); { _loop25: do { if ((LA(1)==HYPHEN) && (LA(2)==IDENTIFIER)) { match(HYPHEN); x = LT(1); match(IDENTIFIER); n.setText( n.getText() + "-" + x.getText() ); } else { break _loop25; } } while (true); } m = createMonosaccharideToken( n ); return m; } /** * Rule for GlycoCT monosaccharide ring closure syntax, of form * "[terminus_position]-[terminus_position]". */ public final void monosac_ring_closure() throws RecognitionException, TokenStreamException { Token t1 = null, t2 = null; t1=terminus_position(); match(COLON); t2=terminus_position(); setRingClosure( t1, t2 ); } /** * Rule for a monosaccharide modification list, which may be * a pipe-symbol ('|') delimited list of monosaccharide * modifications. The general form of a monosac modification is * "[terminus_position]:[identifier]". Modifications that affect * 2 terminii (such as double or triple bonds) are also matched * by this rule. */ public final void monosac_substituents_or_modifications() throws RecognitionException, TokenStreamException { Token t1 = null; Token t2 = null; Token n = null; match(PIPE); t1 = LT(1); match(INTEGER); { switch ( LA(1)) { case COMMA: { match(COMMA); t2 = LT(1); match(INTEGER); break; } case COLON: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } match(COLON); n = LT(1); match(IDENTIFIER); addSubstituentOrModification( n, t1, t2 ); } /** * Rule defining a valid terminus position, which may be * either a positive integer, or the unknown symbol ('x'). */ public final Token terminus_position() throws RecognitionException, TokenStreamException { Token t; Token i = null; Token u = null; Token q = null; switch ( LA(1)) { case INTEGER: { i = LT(1); match(INTEGER); t = i; break; } case HYPHEN: { u = LT(1); match(HYPHEN); match(INTEGER); t = u; t.setText("-1"); break; } case UNKNOWN_TERMINUS: { q = LT(1); match(UNKNOWN_TERMINUS); t = q; break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } return t; } public static final String[] _tokenNames = { "<0>", "EOF", "<2>", "NULL_TREE_LOOKAHEAD", "a RES (residue) section start identifier", "a LIN (linkage) section start identifier", "a PRO (heterogeneity due to uncertainty) section start identifier", "a REP (repeat) section start identifier", "a positive integer or zero", "a residue/linkage token separator ';'", "a basetype declaration 'b:'", "a substituent declaration 's:'", "a repeat structure declaration 'r:'", "an alphabetic identifier", "a hyphen '-'", "a superclass descriptor", "a colon separator ':'", "an unknown terminal position '?'", "a residue substitution delimiter '|'", "a comma ','", "\"R\"", "a linkage start delimiter '('", "a linkage terminii delimiter '+'", "a linkage end delimiter ')'", "a repeat range delimiter '='", "an inchi section declaration 'i:'", "a STA (heterogeneity due to a statistical distribution) section start identifier", "an ISO (isotope) section start identifier", "an AGL (aglycon) section start identifier", "WS" }; }