/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1932 $ by $Author: glycoslave $ on $Date:: 2010-08-05 #$
*/
package org.eurocarbdb.sugar.seq.grammar;
// stdlib imports
import java.util.Stack;
// 3rd party imports
import antlr.Token;
import antlr.TokenBuffer;
import antlr.TokenStream;
import antlr.SemanticException;
import antlr.ParserSharedInputState;
import org.apache.log4j.Logger;
// eurocarb imports
import org.eurocarbdb.sugar.*;
import org.eurocarbdb.util.graph.*;
import org.eurocarbdb.sugar.seq.grammar.ResidueToken;
import org.eurocarbdb.sugar.seq.grammar.LinkageToken;
/* class IupacParserAdaptor *//************************************
*<p>
* This class is a wrapper around the ParserAdaptor class to implement
* a state machine for parsing the Iupac sequence format from left to
* right.
*</p>
*<p>
* The implicit natural structure of the Iupac format is right
* (root) to left (leaves), so parsing left to right we build up a kind
* of state machine, buffering residues and linkages until their attachment
* points can be fully determined.
*</p>
*<p>
* At the most basic level, the parser works by accumulating residue-linkage
* pairs (the addLinkedResidue method), and marking branch start- and end-
* points via the branchStart() and branchEnd() methods.
*</p>
*<p>
* Note that all declared methods of this class are final in order to
* maximise inlineability & parsing speed.
*</p>
* @see iupac_grammar.g
* @author mjh <glycoslave@gmail.com>
*/
public abstract class IupacParserAdaptor extends ParserAdaptor
{
//~~~~~~~~~~~~~~~~~~~~~ STATIC FIELDS ~~~~~~~~~~~~~~~~~~~~~~~//
/** Logging instance. */
static final Logger log = Logger.getLogger( IupacParserAdaptor.class );
static final boolean DEBUGGING = log.isDebugEnabled();
//~~~~~~~~~~~~~~~~~~~~~~~~~~ FIELDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~//
/** Branch stack buffer for residue tokens. Each element of this
* stack represents a residue that is an end point of a buffered
* branch. */
protected Stack<ResidueToken> residueBuffer = new Stack<ResidueToken>();
/** Branch stack buffer for linkage tokens. Each element of this
* stack represents a linkage that is an end point of a buffered
* branch. Each linkage token in this stack matches a residue in
* residueBuffer (ie: it is that residue's reducing terminal linkage).
*/
protected Stack<LinkageToken> linkageBuffer = new Stack<LinkageToken>();
/** Current residue token cursor. */
protected ResidueToken lastResidue;
/** Current linkage token cursor. */
protected LinkageToken lastLinkage;
/** Indicates that the currently parsed branch has just been closed. */
private boolean branch_just_ended = false;
/** The integer maintains a running count of how deeply nested we are
* in branches. let i == branch_depth. when i > 0, we are in the i'th
* nested branch; when i == 0 we are parsing along the "main" sugar
* branch (ie: not in a nested branch). if i < 0 then that is a bug :-( */
private int branch_depth = 0;
//~~~~~~~~~~~~~~~~~~~~~~ CONSTRUCTORS ~~~~~~~~~~~~~~~~~~~~~~~//
/* pointlessly inherited constructors, stupid java */
public IupacParserAdaptor( int k ) { super( k ); }
public IupacParserAdaptor( ParserSharedInputState state, int k ) { super( state, k ); }
public IupacParserAdaptor( TokenBuffer buffer, int k ) { super( buffer, k ); }
public IupacParserAdaptor( TokenStream stream, int k ) { super( stream, k ); }
//~~~~~~~~~~~~~~~~~~~~~~~~~ METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~//
/* addLinkedResidue *//****************************************
*<p>
* Add a just-parsed {@link Residue}-{@link Linkage} pair to the
* growing sugar {@link Graph}. Note that this does not necessarily
* mean that the residue/linkage is added immediately, as the parse
* is still in progress.
*</p><p>
* In short, the passed {@link ResidueToken} rt will be added immediately
* to the graph if it is not a leaf residue, whereas all passed
* {@link LinkageToken}s will never be added immediately, since we will
* always have to continue parsing to get the residue to which
* it's bound.
*</p>
*/
protected final void addLinkedResidue( ResidueToken rt, LinkageToken lt )
throws SemanticException
{
// add this incoming residue to the graph immediately; worry
// about connecting it later.
addResidue( rt );
if ( lastResidue != null && lastLinkage != null )
{
// ok, there is a residue & linkage cached from a previous invocation
// of this method, so we can go ahead and connect rt & lastResidue
// by lastLinkage.
if ( DEBUGGING )
log.debug("adding linkage " + lastLinkage + " to child=" + lastResidue );
addLinkage( rt, lastLinkage, lastResidue );
}
// if a branch just ended then attach associated saved/buffered
// branches, if any.
if ( branch_just_ended )
attachSavedBranchesTo( rt );
// unconditionally update residue/linkage cursors.
lastResidue = rt;
lastLinkage = lt;
}
/* addRootResidue *//******************************************
*
* Adds the penultimate sugar root residue, effectively finalising
* the parsing of the current sugar (since in Iupac format the root
* monosaccharide is parsed last when parsing from left to right).
*/
protected final void addRootResidue( ResidueToken rt )
throws SemanticException
{
if ( DEBUGGING )
log.debug("adding final (root) residue " + rt );
addResidue( rt );
setRootResidue( rt );
if ( lastLinkage != null )
addLinkage( rt, lastLinkage, lastResidue );
// addLinkage( lastResidue, lastLinkage, rt );
if ( branch_just_ended )
attachSavedBranchesTo( rt );
}
/* branchStarts *//********************************************
*
* Indicates that we have just encountered the start of a new branch.
* Internally, this means the last-encountered residue and linkage
* pair are pushed to their respective Stack buffers to be popped
* when the end of this branch is encountered.
*/
protected final void branchStarts()
{
if ( DEBUGGING )
log.debug( "new branch starting" );
branch_depth++;
// this is a special case where a branch closes right
// next to where a new branch opens.
if ( branch_just_ended )
branch_just_ended = false;
if ( DEBUGGING )
{
log.debug( "saving the branch ending with residue="
+ lastResidue
+ ", linkage="
+ lastLinkage
+ ", branch depth = "
+ branch_depth
);
}
residueBuffer.push( lastResidue );
linkageBuffer.push( lastLinkage );
lastResidue = null;
lastLinkage = null;
}
/* branchEnds *//**********************************************
*
* Indicates that we have just encountered the end of a branch.
* Internally, this method only marks that the most recent residue/linkage
* in our residue/linkage stack need to be added to whatever is
* the next parsed residue.
*/
protected final void branchEnds()
{
if ( DEBUGGING )
log.debug( "marking end of branch" );
branch_just_ended = true;
branch_depth--;
}
/** Returns {@link SequenceFormat#Iupac}. */
@Override
public final SequenceFormat getSequenceFormat()
{
return SequenceFormat.Iupac;
}
//~~~~~~~~~~~~~~~~~~~~~ PRIVATE METHODS ~~~~~~~~~~~~~~~~~~~~~~~//
/* attachSavedBranchesTo *//****************************************
*
* This method adds saved linkage/residue pairs from the
* residue/linkage stacks and adds them to the passed Residue.
* The number of branches to add is equal to the current
* residue/linkage buffer size minus the current branch depth.
*/
private final void attachSavedBranchesTo( ResidueToken rt )
throws SemanticException
{
// the number of branches that need to be added is equal to
// the current stack size minus (the number of branch opens
// minus the number of branch opens).
int branches_to_add = residueBuffer.size() - branch_depth;
// this value should never be < 0, if it is, it's a bug.
if ( branches_to_add <= 0 )
throw new RuntimeException("BUG! THIS SHOULDN'T HAPPEN!!!");
if ( DEBUGGING )
log.debug("adding " + branches_to_add + " saved branch(es):");
while ( branches_to_add-- > 0 )
{
lastResidue = residueBuffer.pop();
lastLinkage = linkageBuffer.pop();
if ( DEBUGGING )
{
log.debug(
"connecting the saved branch that ends in residue="
+ lastResidue
+ ", linkage="
+ lastLinkage
);
}
// addLinkage( lastResidue, lastLinkage, rt );
addLinkage( rt, lastLinkage, lastResidue );
branch_just_ended = false;
}
}
} // end of class