// $ANTLR 2.7.6 (2005-12-22): "iupac_grammar.g" -> "IupacParser.java"$ package org.eurocarbdb.sugar.seq.grammar; import org.eurocarbdb.sugar.seq.grammar.IupacParserAdaptor; import org.eurocarbdb.sugar.seq.grammar.ParserAdaptor; import org.eurocarbdb.sugar.seq.grammar.ResidueToken; import org.eurocarbdb.sugar.seq.grammar.LinkageToken; import antlr.TokenBuffer; import antlr.TokenStreamException; import antlr.TokenStreamIOException; import antlr.ANTLRException; import antlr.LLkParser; import antlr.Token; import antlr.TokenStream; import antlr.RecognitionException; import antlr.NoViableAltException; import antlr.MismatchedTokenException; import antlr.SemanticException; import antlr.ParserSharedInputState; import antlr.collections.impl.BitSet; /***************************************************** *<p> * This class defines an LLk parser based on ANTLR (http://antlr.org) syntax * rules for parsing carbohydrate sequences in extended IUPAC syntax, according * to the syntax rules described (TODO: provide a definitive syntax reference link). *</p> *<p> * This class inherits from 2 superclasses, which provide the majority of * the semantic action code that is called from within this grammar. This * is in order to keep the grammar as clear as possible and to facillitate * re-targeting of this grammar to other languages than Java (at time of * writing ANTLR also supports C++, python, C#). *</p> *<p> * Note that the source code for this class has been auto-generated by ANTLR. *</p> * * @see IupacLexer * @see IupacParserAdaptor * @see ParserAdaptor * @see iupac_grammar.g * * @author mjh */ public class IupacParser extends org.eurocarbdb.sugar.seq.grammar.IupacParserAdaptor implements IupacParserTokenTypes { protected IupacParser(TokenBuffer tokenBuf, int k) { super(tokenBuf,k); tokenNames = _tokenNames; } public IupacParser(TokenBuffer tokenBuf) { this(tokenBuf,3); } protected IupacParser(TokenStream lexer, int k) { super(lexer,k); tokenNames = _tokenNames; } public IupacParser(TokenStream lexer) { this(lexer,3); } public IupacParser(ParserSharedInputState state) { super(state,3); tokenNames = _tokenNames; } /** Toplevel rule defining a sugar sequence. */ public final void sugar() throws RecognitionException, TokenStreamException { { _loop3: do { if ((LA(1)==OPENING_REPEAT_DELIM||LA(1)==RESIDUE) && (_tokenSet_0.member(LA(2)))) { linked_residue(); } else { break _loop3; } } while (true); } root_monosaccharide(); { switch ( LA(1)) { case NEWLINE: { match(NEWLINE); break; } case EOF: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } } /** * Parent rule for anything attached to the root monsaccharide, specifically * residue_linkage pairs, and any branches. */ public final void linked_residue() throws RecognitionException, TokenStreamException { residue_linkage_pair(); { _loop8: do { if ((LA(1)==OPENING_BRANCH_DELIM)) { branch(); } else { break _loop8; } } while (true); } } /** Rule for the root monosaccharide in a sequence. */ public final void root_monosaccharide() throws RecognitionException, TokenStreamException { ResidueToken m = null; m=monosaccharide(); addRootResidue( m ); } /** Rule for a monosaccharide residue name. */ public final ResidueToken monosaccharide() throws RecognitionException, TokenStreamException { ResidueToken rt = null ; Token m = null; m = LT(1); match(RESIDUE); rt = createMonosaccharideToken( m ); return rt; } /** * Specifies a residue-linkage pair. Residues are always associated with * just one linkage on their reducing terminal (right) side, and both of * these are <em>typed</em>; that is, a residue may be a * <em>monosaccharide</em>, <em>substituent</em>, or a <em>repeat</em>. */ public final void residue_linkage_pair() throws RecognitionException, TokenStreamException { if ((LA(1)==RESIDUE) && (LA(2)==OPENING_LINKAGE_DELIM) && (LA(3)==ANOMER||LA(3)==UNKNOWN_ANOMER)) { linked_monosaccharide(); } else if ((LA(1)==RESIDUE) && (LA(2)==INTERNAL_DELIM||LA(2)==OPENING_LINKAGE_DELIM) && (_tokenSet_1.member(LA(3)))) { linked_substituent(); } else if ((LA(1)==OPENING_REPEAT_DELIM)) { linked_repeat(); } else { throw new NoViableAltException(LT(1), getFilename()); } } /** * Rule for a branch. Branches are effectively sugar subsequences enclosed * by the branch delimiters '[]'. Branches cannot be "empty". */ public final void branch() throws RecognitionException, TokenStreamException { Token b1 = null; Token b2 = null; b1 = LT(1); match(OPENING_BRANCH_DELIM); branchStarts(); { int _cnt21=0; _loop21: do { if ((LA(1)==OPENING_REPEAT_DELIM||LA(1)==RESIDUE)) { linked_residue(); } else { if ( _cnt21>=1 ) { break _loop21; } else {throw new NoViableAltException(LT(1), getFilename());} } _cnt21++; } while (true); } b2 = LT(1); match(CLOSING_BRANCH_DELIM); branchEnds(); } /** Rule for a "regular" monosaccharide/linkage pair. */ public final void linked_monosaccharide() throws RecognitionException, TokenStreamException { LinkageToken lt = null; ResidueToken mt = null; mt=monosaccharide(); lt=monosaccharide_linkage(); addLinkedResidue( mt, lt ); } /** * Rule for a substituent/linkage pair. A substituent is anything that * is not a monosaccharide. */ public final void linked_substituent() throws RecognitionException, TokenStreamException { ResidueToken st = null; LinkageToken lt = null; st=substituent(); lt=substituent_linkage(); addLinkedResidue( st, lt ); } /** * Rule for a repeat/linkage pair. Repeats are effectively regarded as a * repeated "sub-sugar" within a sugar, along with additional information * about the number of repeats. */ public final void linked_repeat() throws RecognitionException, TokenStreamException { LinkageToken lt = null; repeat_unit(); lt=monosaccharide_linkage(); /* TODO: handle repeats */ } /** * Rule for a monosaccharide linkage. In their most basic form, * monosaccharide linkages consist of an <em>anomer</em>, a <em>reducing * terminus</em>, and a <em>non-reducing terminus</em>, enclosed by the * linkage delimiters '()'. Monosaccharide (and other) linkages may also * contain <em>unknown elements</em> '?', <em>alternation</em> '|', and/or * have <em>multiconnections</em>. *<br/> * Example: "Hex(a1-4)Hex" */ public final LinkageToken monosaccharide_linkage() throws RecognitionException, TokenStreamException { LinkageToken lt = null ; Token o = null; Token h = null; Token c = null; Token anom, parent = null, child = null; LinkageToken mlink = null; o = LT(1); match(OPENING_LINKAGE_DELIM); anom=anomer(); child=reducing_terminus(); h = LT(1); match(INTERNAL_DELIM); { switch ( LA(1)) { case NUMBER: case UNKNOWN_TERMINUS: { parent=nonreducing_terminus(); break; } case CLOSING_LINKAGE_DELIM: case COMMA: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } lt = createLinkageToken( anom, parent, child ); { _loop25: do { if ((LA(1)==COMMA)) { mlink=multiconnected_monosac_linkage(); } else { break _loop25; } } while (true); } /* MULTI */ c = LT(1); match(CLOSING_LINKAGE_DELIM); return lt; } /** Rule for a substituent residue name. */ public final ResidueToken substituent() throws RecognitionException, TokenStreamException { ResidueToken ss = null ; Token s = null; s = LT(1); match(RESIDUE); ss = createSubstituentToken( s ); return ss; } /** * Rule for a substituent linkage. Substituent linkages have similar * characteristics to monosaccharide linkages but do not comprise an * anomeric configuration or reducing terminus. *<br/> * Example: "P(-4)Glc", "S(-6)Glc" */ public final LinkageToken substituent_linkage() throws RecognitionException, TokenStreamException { LinkageToken lt = null ; Token o = null; Token c = null; Token parent; switch ( LA(1)) { case INTERNAL_DELIM: { match(INTERNAL_DELIM); break; } case OPENING_LINKAGE_DELIM: { o = LT(1); match(OPENING_LINKAGE_DELIM); match(INTERNAL_DELIM); parent=nonreducing_terminus(); { _loop28: do { if ((LA(1)==COMMA)) { multiconnected_substit_linkage(); } else { break _loop28; } } while (true); } c = LT(1); match(CLOSING_LINKAGE_DELIM); lt = createLinkageToken( null, parent, null ); break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } return lt; } /** * Defines a repeat unit. A repeat unit is effectively a sub-structure within * a larger sugar that may be repeated a definite or variable number of times. * Repeat units consist of an optional range specification, followed by any * number of <code>linked_residue</code>s, all enclosed by repeat delimiters '{}'. */ public final void repeat_unit() throws RecognitionException, TokenStreamException { ResidueToken m = null; match(OPENING_REPEAT_DELIM); { switch ( LA(1)) { case NUMBER: { repeat_range(); break; } case OPENING_REPEAT_DELIM: case RESIDUE: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } { int _cnt16=0; _loop16: do { if ((LA(1)==OPENING_REPEAT_DELIM||LA(1)==RESIDUE) && (_tokenSet_0.member(LA(2)))) { linked_residue(); } else { if ( _cnt16>=1 ) { break _loop16; } else {throw new NoViableAltException(LT(1), getFilename());} } _cnt16++; } while (true); } m=monosaccharide(); match(CLOSING_REPEAT_DELIM); /* todo: handle repeats */ } /** Rule for a repeat range, of form "10-20:". */ public final void repeat_range() throws RecognitionException, TokenStreamException { Token lower = null; Token upper = null; lower = LT(1); match(NUMBER); { switch ( LA(1)) { case INTERNAL_DELIM: { match(INTERNAL_DELIM); upper = LT(1); match(NUMBER); break; } case REPEAT_RANGE_DELIM: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } match(REPEAT_RANGE_DELIM); checkRepeatBounds( lower, upper ); } /** * Rule for an anomer, specifically 'a' for alpha, 'b' for beta, * or 'u' for unknown. */ public final Token anomer() throws RecognitionException, TokenStreamException { Token anom ; Token a = null; Token u = null; switch ( LA(1)) { case ANOMER: { a = LT(1); match(ANOMER); anom = a; break; } case UNKNOWN_ANOMER: { u = LT(1); match(UNKNOWN_ANOMER); anom = null; break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } return anom ; } /** * Rule for a reducing terminal position, ie: any positive integer * greater than zero that is reasonable for the monosaccharide found * on the reducing terminal side of the given linkage. */ public final Token reducing_terminus() throws RecognitionException, TokenStreamException { Token rterm ; Token r = null; Token u = null; switch ( LA(1)) { case NUMBER: { r = LT(1); match(NUMBER); rterm = r; break; } case UNKNOWN_TERMINUS: { u = LT(1); match(UNKNOWN_TERMINUS); rterm = null; break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } return rterm ; } /** * Rule for a reducing terminal position, ie: any positive integer * greater than zero that is reasonable for the monosaccharide found * on the non-reducing terminal side of the given linkage. */ public final Token nonreducing_terminus() throws RecognitionException, TokenStreamException { Token nrterm ; Token t = null; Token u = null; switch ( LA(1)) { case NUMBER: { t = LT(1); match(NUMBER); nrterm = t; { _loop39: do { if ((LA(1)==LOGICAL_OR)) { match(LOGICAL_OR); match(NUMBER); } else { break _loop39; } } while (true); } break; } case UNKNOWN_TERMINUS: { u = LT(1); match(UNKNOWN_TERMINUS); nrterm = null; break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } return nrterm ; } /** * Monosaccharide linkages may be multi-connected, whereby a second * linkage specification appears within the linkage delimiters '()', * separated from other linkage specifications by commas. *<br/> * Example: "NeuAc(a2-8,1-9)NeuAc" */ public final LinkageToken multiconnected_monosac_linkage() throws RecognitionException, TokenStreamException { LinkageToken lt = null ; Token parent = null, child = null; match(COMMA); child=nonreducing_terminus(); match(INTERNAL_DELIM); { switch ( LA(1)) { case NUMBER: case UNKNOWN_TERMINUS: { parent=nonreducing_terminus(); break; } case CLOSING_LINKAGE_DELIM: case COMMA: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } lt = createLinkageToken( null, parent, child ); return lt; } /** * Substituent linkages may also be potentially multiconnected in the * same manner as multiconnected monosaccharides, with the previously * described limitation that substituent linkages do not comprise an * anomeric configuration. */ public final void multiconnected_substit_linkage() throws RecognitionException, TokenStreamException { Token child, parent; match(COMMA); { switch ( LA(1)) { case NUMBER: case UNKNOWN_TERMINUS: { child=nonreducing_terminus(); break; } case INTERNAL_DELIM: { break; } default: { throw new NoViableAltException(LT(1), getFilename()); } } } match(INTERNAL_DELIM); parent=nonreducing_terminus(); } public static final String[] _tokenNames = { "<0>", "EOF", "<2>", "NULL_TREE_LOOKAHEAD", "NEWLINE", "an opening repeat delimiter '{'", "a closing repeat delimiter '}'", "a non-zero terminal position", "an internal linkage delimiter '-'", "a repeat range delimiter ':'", "an opening branch delimiter '['", "a closing branch delimiter ']'", "an opening linkage delimiter '('", "a closing linkage delimiter ')'", "a comma", "a residue name", "a linkage anomer", "an unknown anomer", "an unknown terminus", "an alternation operator" }; private static final long[] mk_tokenSet_0() { long[] data = { 37280L, 0L}; return data; } public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0()); private static final long[] mk_tokenSet_1() { long[] data = { 36128L, 0L}; return data; } public static final BitSet _tokenSet_1 = new BitSet(mk_tokenSet_1()); }