// $ANTLR 2.7.6 (2005-12-22): "iupac_grammar.g" -> "IupacParser.java"$
package org.eurocarbdb.sugar.seq.grammar;
import org.eurocarbdb.sugar.seq.grammar.IupacParserAdaptor;
import org.eurocarbdb.sugar.seq.grammar.ParserAdaptor;
import org.eurocarbdb.sugar.seq.grammar.ResidueToken;
import org.eurocarbdb.sugar.seq.grammar.LinkageToken;
import antlr.TokenBuffer;
import antlr.TokenStreamException;
import antlr.TokenStreamIOException;
import antlr.ANTLRException;
import antlr.LLkParser;
import antlr.Token;
import antlr.TokenStream;
import antlr.RecognitionException;
import antlr.NoViableAltException;
import antlr.MismatchedTokenException;
import antlr.SemanticException;
import antlr.ParserSharedInputState;
import antlr.collections.impl.BitSet;
/*****************************************************
*<p>
* This class defines an LLk parser based on ANTLR (http://antlr.org) syntax
* rules for parsing carbohydrate sequences in extended IUPAC syntax, according
* to the syntax rules described (TODO: provide a definitive syntax reference link).
*</p>
*<p>
* This class inherits from 2 superclasses, which provide the majority of
* the semantic action code that is called from within this grammar. This
* is in order to keep the grammar as clear as possible and to facillitate
* re-targeting of this grammar to other languages than Java (at time of
* writing ANTLR also supports C++, python, C#).
*</p>
*<p>
* Note that the source code for this class has been auto-generated by ANTLR.
*</p>
*
* @see IupacLexer
* @see IupacParserAdaptor
* @see ParserAdaptor
* @see iupac_grammar.g
*
* @author mjh
*/
public class IupacParser extends org.eurocarbdb.sugar.seq.grammar.IupacParserAdaptor implements IupacParserTokenTypes
{
protected IupacParser(TokenBuffer tokenBuf, int k) {
super(tokenBuf,k);
tokenNames = _tokenNames;
}
public IupacParser(TokenBuffer tokenBuf) {
this(tokenBuf,3);
}
protected IupacParser(TokenStream lexer, int k) {
super(lexer,k);
tokenNames = _tokenNames;
}
public IupacParser(TokenStream lexer) {
this(lexer,3);
}
public IupacParser(ParserSharedInputState state) {
super(state,3);
tokenNames = _tokenNames;
}
/** Toplevel rule defining a sugar sequence. */
public final void sugar() throws RecognitionException, TokenStreamException {
{
_loop3:
do {
if ((LA(1)==OPENING_REPEAT_DELIM||LA(1)==RESIDUE) && (_tokenSet_0.member(LA(2)))) {
linked_residue();
}
else {
break _loop3;
}
} while (true);
}
root_monosaccharide();
{
switch ( LA(1)) {
case NEWLINE:
{
match(NEWLINE);
break;
}
case EOF:
{
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
}
}
/**
* Parent rule for anything attached to the root monsaccharide, specifically
* residue_linkage pairs, and any branches.
*/
public final void linked_residue() throws RecognitionException, TokenStreamException {
residue_linkage_pair();
{
_loop8:
do {
if ((LA(1)==OPENING_BRANCH_DELIM)) {
branch();
}
else {
break _loop8;
}
} while (true);
}
}
/** Rule for the root monosaccharide in a sequence. */
public final void root_monosaccharide() throws RecognitionException, TokenStreamException {
ResidueToken m = null;
m=monosaccharide();
addRootResidue( m );
}
/** Rule for a monosaccharide residue name. */
public final ResidueToken monosaccharide() throws RecognitionException, TokenStreamException {
ResidueToken rt = null ;
Token m = null;
m = LT(1);
match(RESIDUE);
rt = createMonosaccharideToken( m );
return rt;
}
/**
* Specifies a residue-linkage pair. Residues are always associated with
* just one linkage on their reducing terminal (right) side, and both of
* these are <em>typed</em>; that is, a residue may be a
* <em>monosaccharide</em>, <em>substituent</em>, or a <em>repeat</em>.
*/
public final void residue_linkage_pair() throws RecognitionException, TokenStreamException {
if ((LA(1)==RESIDUE) && (LA(2)==OPENING_LINKAGE_DELIM) && (LA(3)==ANOMER||LA(3)==UNKNOWN_ANOMER)) {
linked_monosaccharide();
}
else if ((LA(1)==RESIDUE) && (LA(2)==INTERNAL_DELIM||LA(2)==OPENING_LINKAGE_DELIM) && (_tokenSet_1.member(LA(3)))) {
linked_substituent();
}
else if ((LA(1)==OPENING_REPEAT_DELIM)) {
linked_repeat();
}
else {
throw new NoViableAltException(LT(1), getFilename());
}
}
/**
* Rule for a branch. Branches are effectively sugar subsequences enclosed
* by the branch delimiters '[]'. Branches cannot be "empty".
*/
public final void branch() throws RecognitionException, TokenStreamException {
Token b1 = null;
Token b2 = null;
b1 = LT(1);
match(OPENING_BRANCH_DELIM);
branchStarts();
{
int _cnt21=0;
_loop21:
do {
if ((LA(1)==OPENING_REPEAT_DELIM||LA(1)==RESIDUE)) {
linked_residue();
}
else {
if ( _cnt21>=1 ) { break _loop21; } else {throw new NoViableAltException(LT(1), getFilename());}
}
_cnt21++;
} while (true);
}
b2 = LT(1);
match(CLOSING_BRANCH_DELIM);
branchEnds();
}
/** Rule for a "regular" monosaccharide/linkage pair. */
public final void linked_monosaccharide() throws RecognitionException, TokenStreamException {
LinkageToken lt = null; ResidueToken mt = null;
mt=monosaccharide();
lt=monosaccharide_linkage();
addLinkedResidue( mt, lt );
}
/**
* Rule for a substituent/linkage pair. A substituent is anything that
* is not a monosaccharide.
*/
public final void linked_substituent() throws RecognitionException, TokenStreamException {
ResidueToken st = null; LinkageToken lt = null;
st=substituent();
lt=substituent_linkage();
addLinkedResidue( st, lt );
}
/**
* Rule for a repeat/linkage pair. Repeats are effectively regarded as a
* repeated "sub-sugar" within a sugar, along with additional information
* about the number of repeats. */
public final void linked_repeat() throws RecognitionException, TokenStreamException {
LinkageToken lt = null;
repeat_unit();
lt=monosaccharide_linkage();
/* TODO: handle repeats */
}
/**
* Rule for a monosaccharide linkage. In their most basic form,
* monosaccharide linkages consist of an <em>anomer</em>, a <em>reducing
* terminus</em>, and a <em>non-reducing terminus</em>, enclosed by the
* linkage delimiters '()'. Monosaccharide (and other) linkages may also
* contain <em>unknown elements</em> '?', <em>alternation</em> '|', and/or
* have <em>multiconnections</em>.
*<br/>
* Example: "Hex(a1-4)Hex"
*/
public final LinkageToken monosaccharide_linkage() throws RecognitionException, TokenStreamException {
LinkageToken lt = null ;
Token o = null;
Token h = null;
Token c = null;
Token anom, parent = null, child = null; LinkageToken mlink = null;
o = LT(1);
match(OPENING_LINKAGE_DELIM);
anom=anomer();
child=reducing_terminus();
h = LT(1);
match(INTERNAL_DELIM);
{
switch ( LA(1)) {
case NUMBER:
case UNKNOWN_TERMINUS:
{
parent=nonreducing_terminus();
break;
}
case CLOSING_LINKAGE_DELIM:
case COMMA:
{
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
}
lt = createLinkageToken( anom, parent, child );
{
_loop25:
do {
if ((LA(1)==COMMA)) {
mlink=multiconnected_monosac_linkage();
}
else {
break _loop25;
}
} while (true);
}
/* MULTI */
c = LT(1);
match(CLOSING_LINKAGE_DELIM);
return lt;
}
/** Rule for a substituent residue name. */
public final ResidueToken substituent() throws RecognitionException, TokenStreamException {
ResidueToken ss = null ;
Token s = null;
s = LT(1);
match(RESIDUE);
ss = createSubstituentToken( s );
return ss;
}
/**
* Rule for a substituent linkage. Substituent linkages have similar
* characteristics to monosaccharide linkages but do not comprise an
* anomeric configuration or reducing terminus.
*<br/>
* Example: "P(-4)Glc", "S(-6)Glc"
*/
public final LinkageToken substituent_linkage() throws RecognitionException, TokenStreamException {
LinkageToken lt = null ;
Token o = null;
Token c = null;
Token parent;
switch ( LA(1)) {
case INTERNAL_DELIM:
{
match(INTERNAL_DELIM);
break;
}
case OPENING_LINKAGE_DELIM:
{
o = LT(1);
match(OPENING_LINKAGE_DELIM);
match(INTERNAL_DELIM);
parent=nonreducing_terminus();
{
_loop28:
do {
if ((LA(1)==COMMA)) {
multiconnected_substit_linkage();
}
else {
break _loop28;
}
} while (true);
}
c = LT(1);
match(CLOSING_LINKAGE_DELIM);
lt = createLinkageToken( null, parent, null );
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
return lt;
}
/**
* Defines a repeat unit. A repeat unit is effectively a sub-structure within
* a larger sugar that may be repeated a definite or variable number of times.
* Repeat units consist of an optional range specification, followed by any
* number of <code>linked_residue</code>s, all enclosed by repeat delimiters '{}'.
*/
public final void repeat_unit() throws RecognitionException, TokenStreamException {
ResidueToken m = null;
match(OPENING_REPEAT_DELIM);
{
switch ( LA(1)) {
case NUMBER:
{
repeat_range();
break;
}
case OPENING_REPEAT_DELIM:
case RESIDUE:
{
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
}
{
int _cnt16=0;
_loop16:
do {
if ((LA(1)==OPENING_REPEAT_DELIM||LA(1)==RESIDUE) && (_tokenSet_0.member(LA(2)))) {
linked_residue();
}
else {
if ( _cnt16>=1 ) { break _loop16; } else {throw new NoViableAltException(LT(1), getFilename());}
}
_cnt16++;
} while (true);
}
m=monosaccharide();
match(CLOSING_REPEAT_DELIM);
/* todo: handle repeats */
}
/** Rule for a repeat range, of form "10-20:". */
public final void repeat_range() throws RecognitionException, TokenStreamException {
Token lower = null;
Token upper = null;
lower = LT(1);
match(NUMBER);
{
switch ( LA(1)) {
case INTERNAL_DELIM:
{
match(INTERNAL_DELIM);
upper = LT(1);
match(NUMBER);
break;
}
case REPEAT_RANGE_DELIM:
{
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
}
match(REPEAT_RANGE_DELIM);
checkRepeatBounds( lower, upper );
}
/**
* Rule for an anomer, specifically 'a' for alpha, 'b' for beta,
* or 'u' for unknown.
*/
public final Token anomer() throws RecognitionException, TokenStreamException {
Token anom ;
Token a = null;
Token u = null;
switch ( LA(1)) {
case ANOMER:
{
a = LT(1);
match(ANOMER);
anom = a;
break;
}
case UNKNOWN_ANOMER:
{
u = LT(1);
match(UNKNOWN_ANOMER);
anom = null;
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
return anom ;
}
/**
* Rule for a reducing terminal position, ie: any positive integer
* greater than zero that is reasonable for the monosaccharide found
* on the reducing terminal side of the given linkage.
*/
public final Token reducing_terminus() throws RecognitionException, TokenStreamException {
Token rterm ;
Token r = null;
Token u = null;
switch ( LA(1)) {
case NUMBER:
{
r = LT(1);
match(NUMBER);
rterm = r;
break;
}
case UNKNOWN_TERMINUS:
{
u = LT(1);
match(UNKNOWN_TERMINUS);
rterm = null;
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
return rterm ;
}
/**
* Rule for a reducing terminal position, ie: any positive integer
* greater than zero that is reasonable for the monosaccharide found
* on the non-reducing terminal side of the given linkage.
*/
public final Token nonreducing_terminus() throws RecognitionException, TokenStreamException {
Token nrterm ;
Token t = null;
Token u = null;
switch ( LA(1)) {
case NUMBER:
{
t = LT(1);
match(NUMBER);
nrterm = t;
{
_loop39:
do {
if ((LA(1)==LOGICAL_OR)) {
match(LOGICAL_OR);
match(NUMBER);
}
else {
break _loop39;
}
} while (true);
}
break;
}
case UNKNOWN_TERMINUS:
{
u = LT(1);
match(UNKNOWN_TERMINUS);
nrterm = null;
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
return nrterm ;
}
/**
* Monosaccharide linkages may be multi-connected, whereby a second
* linkage specification appears within the linkage delimiters '()',
* separated from other linkage specifications by commas.
*<br/>
* Example: "NeuAc(a2-8,1-9)NeuAc"
*/
public final LinkageToken multiconnected_monosac_linkage() throws RecognitionException, TokenStreamException {
LinkageToken lt = null ;
Token parent = null, child = null;
match(COMMA);
child=nonreducing_terminus();
match(INTERNAL_DELIM);
{
switch ( LA(1)) {
case NUMBER:
case UNKNOWN_TERMINUS:
{
parent=nonreducing_terminus();
break;
}
case CLOSING_LINKAGE_DELIM:
case COMMA:
{
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
}
lt = createLinkageToken( null, parent, child );
return lt;
}
/**
* Substituent linkages may also be potentially multiconnected in the
* same manner as multiconnected monosaccharides, with the previously
* described limitation that substituent linkages do not comprise an
* anomeric configuration.
*/
public final void multiconnected_substit_linkage() throws RecognitionException, TokenStreamException {
Token child, parent;
match(COMMA);
{
switch ( LA(1)) {
case NUMBER:
case UNKNOWN_TERMINUS:
{
child=nonreducing_terminus();
break;
}
case INTERNAL_DELIM:
{
break;
}
default:
{
throw new NoViableAltException(LT(1), getFilename());
}
}
}
match(INTERNAL_DELIM);
parent=nonreducing_terminus();
}
public static final String[] _tokenNames = {
"<0>",
"EOF",
"<2>",
"NULL_TREE_LOOKAHEAD",
"NEWLINE",
"an opening repeat delimiter '{'",
"a closing repeat delimiter '}'",
"a non-zero terminal position",
"an internal linkage delimiter '-'",
"a repeat range delimiter ':'",
"an opening branch delimiter '['",
"a closing branch delimiter ']'",
"an opening linkage delimiter '('",
"a closing linkage delimiter ')'",
"a comma",
"a residue name",
"a linkage anomer",
"an unknown anomer",
"an unknown terminus",
"an alternation operator"
};
private static final long[] mk_tokenSet_0() {
long[] data = { 37280L, 0L};
return data;
}
public static final BitSet _tokenSet_0 = new BitSet(mk_tokenSet_0());
private static final long[] mk_tokenSet_1() {
long[] data = { 36128L, 0L};
return data;
}
public static final BitSet _tokenSet_1 = new BitSet(mk_tokenSet_1());
}