/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1559 $ by $Author: glycoslave $ on $Date:: 2009-07-21 #$
*/
package org.eurocarbdb.sugar.seq.grammar;
// stdlib imports
import java.util.List;
import java.util.EnumSet;
import java.util.ArrayList;
import java.util.Iterator;
// 3rd party imports
import org.apache.log4j.Logger;
import antlr.Token;
import antlr.TokenBuffer;
import antlr.TokenStream;
import antlr.SemanticException;
import antlr.ParserSharedInputState;
// 3rd party imports
// eurocarb imports
import org.eurocarbdb.sugar.Sugar;
import org.eurocarbdb.sugar.Anomer;
import org.eurocarbdb.sugar.Residue;
import org.eurocarbdb.sugar.Linkage;
import org.eurocarbdb.sugar.Basetype;
import org.eurocarbdb.sugar.CommonBasetype;
import org.eurocarbdb.sugar.Superclass;
import org.eurocarbdb.sugar.StereoConfig;
import org.eurocarbdb.sugar.Modification;
import org.eurocarbdb.sugar.ModificationType;
import org.eurocarbdb.sugar.Monosaccharide;
import org.eurocarbdb.sugar.Substituent;
import org.eurocarbdb.sugar.Substituents;
import org.eurocarbdb.sugar.GlycosidicLinkage;
import org.eurocarbdb.sugar.SequenceFormatException;
import org.eurocarbdb.sugar.SequenceFormat;
import org.eurocarbdb.sugar.SugarRepeat;
import org.eurocarbdb.sugar.SugarRepeatAnnotation;
import org.eurocarbdb.sugar.seq.GlycoctSequenceFormat;
import org.eurocarbdb.util.graph.Graph;
import org.eurocarbdb.util.graph.Edge;
import org.eurocarbdb.util.graph.Vertex;
import org.eurocarbdb.sugar.impl.SimpleMonosaccharide;
import org.eurocarbdb.sugar.impl.ComplexMonosaccharide;
// static imports
import static org.eurocarbdb.util.StringUtils.join;
import static org.eurocarbdb.sugar.Basetypes.getBasetype;
import static org.eurocarbdb.sugar.Substituents.substituentIsPartOfMonosaccharide;
/* class GlycoctParserAdaptor *//**********************************
*
* Utility class to support parsing carbohydrates in GlycoCT
* sequence format. This class provides methods for use in
* {@link GlycoctParser}, the class that is auto-generated from
* a grammar file by ANTLR.
*
* @see GlycoctSequenceFormat
* @see ResidueToken
* @see LinkageToken
*/
public abstract class GlycoctParserAdaptor extends ParserAdaptor
{
//~~~~~~~~~~~~~~~~~~~~~ STATIC FIELDS ~~~~~~~~~~~~~~~~~~~~~~~//
/** Logging instance. */
static final Logger log = Logger.getLogger( GlycoctParserAdaptor.class );
static final boolean debugging = log.isDebugEnabled();
static final boolean tracing = log.isTraceEnabled();
//~~~~~~~~~~~~~~~~~~~~~~~~~~ FIELDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~//
/** List of residues added in order of addition, oldest first. */
protected List<ResidueToken> residues = new ArrayList<ResidueToken>();
/** List of linkages added in order of addition, oldest first. */
protected List<LinkageToken> linkages = new ArrayList<LinkageToken>();
/** List of linkages added in order of addition, oldest first. */
protected List<RepeatResidueToken> repeats = new ArrayList<RepeatResidueToken>( 2 );
protected List<RepeatResidueToken> repeatsStack
= new ArrayList<RepeatResidueToken>( 2 );
//~~~~~~~~~~~~~~~~~~~~~~ CONSTRUCTORS ~~~~~~~~~~~~~~~~~~~~~~~//
/* pointlessly inherited constructors, stupid java */
public GlycoctParserAdaptor( int k ) { super( k ); }
public GlycoctParserAdaptor( ParserSharedInputState state, int k ) { super( state, k ); }
public GlycoctParserAdaptor( TokenBuffer buffer, int k ) { super( buffer, k ); }
public GlycoctParserAdaptor( TokenStream stream, int k ) { super( stream, k ); }
//~~~~~~~~~~~~~~~~~~~~~~~~~ METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@Override
public void addResidue( ResidueToken r ) throws SequenceFormatException
{
super.addResidue( r );
residues.add( r );
if ( residues.size() == 1 )
setRootResidue( r );
// add to repeats if we are inside of a repeat
for ( RepeatResidueToken repeat : repeatsStack )
{
if ( tracing )
log.trace( "also adding last residue to repeat sub-graph" );
repeat.addResidueToken( r );
}
}
/* addLinkage *//**********************************************
*
* Adds a new linkage into the sugar object graph.
*
* @param linkage_index the index of the linkage being referenced in the list of linkages
* @param parent_residue_index the index of the residue on the reducing (parent) side of the linkage
* @param parent_linkage_type type of bond on parent's side of linkage
* @param linkage_parent terminal position of bond to parent residue
* @param linkage_child terminal position of bond to parent residue
* @param child_residue_index the index of the residue on the non-reducing (child) side of the linkage
* @param child_linkage_type type of bond on child's side of linkage
*/
public void addLinkage( Token linkage_index
, Token parent_residue_index
, Token parent_linkage_type
, Token linkage_parent
, Token linkage_child
, Token child_residue_index
, Token child_linkage_type )
throws SequenceFormatException
{
// retrieve link index, reducing terminal position, non-reducing terminal position
int link_index = getLinkageIndexFor( linkage_index );
int parent_index = getResidueIndexFor( parent_residue_index );
int child_index = getResidueIndexFor( child_residue_index );
// get parent & child residues
ResidueToken parent = residues.get( parent_index );
ResidueToken child = residues.get( child_index );
if ( "?".equals( linkage_parent.getText() ) )
linkage_parent.setText( "" + LinkageToken.UNKNOWN_TERMINUS );
if ( "?".equals( linkage_child.getText() ) )
linkage_child.setText( "" + LinkageToken.UNKNOWN_TERMINUS );
// create the linkage itself
LinkageToken link = createLinkageToken( child.anomer, linkage_parent, linkage_child );
// this is so that error messages know where linkages start and finish in the sequence
link.setLeftColumn( parent_residue_index.getColumn() );
link.setRightColumn( child_linkage_type.getColumn() );
super.addLinkage( parent, link, child );
linkages.add( link );
/*
// add to repeats if we are inside of a repeat
for ( RepeatResidueToken repeat : repeatsStack )
{
log.debug( "also adding last linkage to repeat sub-graph" );
repeat.getGraph().addEdge(
this.graph.getVertex( parent ),
this.graph.getVertex( child ),
link
);
}
*/
}
/* addSubstituentOrModification *//**************************************
*
* Attempt to add a {@link Substituent} or {@link Modification} from the
* passed {@link Token} to current {@link Residue} ({@link #lastResidue})
* at given position(s).
*
* @param mod_or_subst
* the token containing the name of the modification
* @param term1
* token containing int position 1
* @param term2
* token containing (optional) int position 2, or null if only 1 position.
*/
public void addSubstituentOrModification( Token mod_or_subst, Token term1, Token term2 )
throws SequenceFormatException
{
assert term1 != null;
assert mod_or_subst != null;
// try to get a Substituent for name, if none, then try to add as
// modification
String name = mod_or_subst.getText();
Substituent s = Substituents.getSubstituent( name );
// if not a Substituent, then must be a modification
if ( s == null )
{
addModification( mod_or_subst, term1, term2 );
return;
}
// ok, it's a substituent, make sure it is the right type
// to be allowed to be merged into the monosaccharide
if ( ! substituentIsPartOfMonosaccharide( s ) )
{
throw createSyntaxException(
mod_or_subst,
"Substituent cannot be considered part of the monosaccharide"
);
}
// get the monosac (token) we're going to attach to.
// the last residue token added *must* be castable to MonosacResidueToken
ResidueToken rt = lastResidue();
assert rt instanceof MonosacResidueToken;
MonosacResidueToken mrt = (MonosacResidueToken) rt;
// get terminus position
int position = getTerminusFor( term1 );
_set_substituent( mrt, mod_or_subst, s, position );
// if term2 != null, then it's a dual-point substituent like
// alkene, which we handle (simplistically) by adding the same
// substituent to both positions. this might need some additional
// code/abstraction in the future, such as moving the check for
// valid positions into the substituent class/instance.
if ( term2 != null )
{
log.trace("adding second position of dual point substituent");
int position2 = getTerminusFor( term2 );
if ( position == position2 )
{
throw createSyntaxException(
term2, "position2 cannot be equal to position1" );
}
// assert contiguous for now...
if ( position2 - position != 1 )
{
log.warn(
"dual point substituent with non-contiguous positions: "
+ "substituent="
+ s
+ ", position1="
+ position
+ ", position2="
+ position2
);
}
_set_substituent( mrt, mod_or_subst, s, position2 );
}
}
private final void _set_substituent( MonosacResidueToken mrt
, Token mod_or_subst
, Substituent s
, int position )
throws SequenceFormatException
{
if ( tracing )
{
log.trace(
"Adding substituent "
+ s
+ " to residue "
+ mrt
+ " at position "
+ position
);
traceParse( mod_or_subst, "A substituent attached to a monosaccharide" );
}
try
{
mrt.setSubstituent( s, position );
}
catch ( IllegalArgumentException ex )
{
SequenceFormatException sfex = createSyntaxException( mod_or_subst, ex.getMessage() );
sfex.initCause( ex );
throw sfex;
}
}
/* addModification *//*****************************************
*
* Attempt to add a {@link Modification} to current {@link Residue}
* ({@link #lastResidue}) at given positions.
*
* @param modification the token containing the name of the modification
* @param term1 token containing int position 1
* @param term2 token containing (optional) int position 2
*/
public void addModification( Token modification, Token term1, Token term2 )
throws SequenceFormatException
{
assert term1 != null;
assert modification != null;
Modification m = null;
ModificationType type = null;
ResidueToken r = lastResidue();
int t1 = getTerminusFor( term1 );
String modification_name = modification.getText();
// lookup modification name
try
{
type = ModificationType.forName( modification_name );
}
catch ( IllegalArgumentException ex )
{
SequenceFormatException sfex
= createSyntaxException( modification, ex.getMessage() );
sfex.initCause( ex );
throw sfex;
}
// is it a dual-point modification?
if ( term2 != null )
{
// dual-point
int t2 = getTerminusFor( term2 );
try
{
m = new Modification( type, t1, t2 );
}
catch ( IllegalArgumentException ex )
{
SequenceFormatException sfex
= createSyntaxException( term2, ex.getMessage() );
sfex.initCause( ex );
throw sfex;
}
}
else
{
// single-point
try
{
m = new Modification( type, t1 );
}
catch ( IllegalArgumentException ex )
{
SequenceFormatException sfex
= createSyntaxException( term1, ex.getMessage() );
sfex.initCause( ex );
throw sfex;
}
}
// add modification to residue
if ( debugging )
{
log.debug("Adding modification " + m + " to residue " + r );
traceParse( modification, "A modification" );
}
// TODO - add modification $m to $r
//
log.warn("TODO modifications...");
// Residue res = r.getResidue();
// if ( ! (res instanceof Monosaccharide) )
// {
// throw new SequenceFormatException(
// getSequence(),
// r.getColumn() - 1,
// "Residue was determined to be a "
// + res.getClass()
// + ", not a Monosaccharide"
// );
// }
// Monosaccharide monosac = (Monosaccharide) res;
// ComplexMonosaccharide cm = (monosac instanceof ComplexMonosaccharide)
// ? (ComplexMonosaccharide) monosac
// : new ComplexMonosaccharide( monosac );
// cm.addModification( m );
return;
}
/**
* Adds a (forward reference to a) repeat sub-structure to the
* current sugar.
* @param token
* token for the index of the repeat being referenced, index starts at 1.
*/
public void addRepeatResidue( Token token )
{
RepeatResidueToken rt = new RepeatResidueToken( this, token, null );
int i = 0;
try { i = Integer.parseInt( token.getText() ); }
catch ( Exception ex )
{
SequenceFormatException sfex = createSyntaxException(
token
, "Invalid repeat index: "
+ ex.getMessage()
);
sfex.initCause( ex );
throw sfex;
}
addResidue( rt );
repeats.add( rt );
// assume for now that the index given is the same as the
// the position of the repeat added to the list
assert repeats.size() == i;
if ( debugging )
{
log.debug("adding repeat " + i );
traceParse( token, "Forward repeat reference" );
}
return;
}
@Override
protected Sugar createSugar()
{
if ( repeats != null && repeats.size() > 0 )
{
if ( tracing )
log.trace("creating repeat sugar...");
return new SugarRepeat( graph.countVertices() );
}
else return super.createSugar();
}
/** Returns {@link SequenceFormat#Glycoct}. */
@Override
public final SequenceFormat getSequenceFormat()
{
return SequenceFormat.Glycoct;
}
@Override
public Sugar getSugar() throws SequenceFormatException
{
if ( debugging )
log.debug("raw parsed sugar AST:\n" + graph.toString() );
boolean has_repeats = ( repeats != null && repeats.size() > 0 );
// remove virtual RepeatResidueTokens from AST and re-stitch
// linkages to the correct places
if ( has_repeats )
inlineRepeats( graph );
// get rid of pesky NAcs and other common substituents
inlineSubstituents( graph );
Sugar sugar = super.getSugar();
// add SugarRepeatAnnotations to sugar
// note: that the passed Sugar will be a SugarRepeat if
// repeats were detected during parse.
if ( has_repeats )
addRepeatAnnotations( (SugarRepeat) sugar );
return sugar;
}
private final void addRepeatAnnotations( SugarRepeat sugar )
{
if ( debugging )
log.debug("adding " + repeats.size() + " repeat annotation(s)");
for ( RepeatResidueToken repeat_residue : repeats )
{
// Sugar is a SugarRepeat; see overridden #createSugar()
SugarRepeatAnnotation a = repeat_residue.getRepeatAnnotation();
sugar.addRepeatAnnotation( a );
}
}
/**
*<p>
* In this class, repeats are initially included into the AST
* as a virtual residue, to match the way they are handled in
* glycoct. The residues of the repeat themselves are treated as
* if attached to this virtual residue. This method removes the
* virtual residue from the graph and stitches its incoming and
* outgoing edges to the root and leaf residues of the repeat.
*</p>
*<p>
* The 4 possibilities are:
* 1) repeat residue is in the middle of the structure
* 2) repeat residue is at the root of the structure
* 3) repeat residue is at a leaf of the structure
* 4) the entire structure is the repeat
*</p>
*/
private final void inlineRepeats( Graph<LinkageToken,ResidueToken> ast )
{
if ( debugging )
log.debug("inlining repeat sub-tree(s)");
for ( RepeatResidueToken repeat_residue : repeats )
{
// need to remove RepeatResidueToken from AST; it is just a
// marker/placeholder residue for the repeat
Vertex<LinkageToken,ResidueToken> v = ast.getVertex( repeat_residue );
assert v != null;
List<Edge<LinkageToken,ResidueToken>> elist;
// incoming edge
elist = v.getIncomingEdges();
Edge<LinkageToken,ResidueToken> incoming_edge = null;
if ( elist.size() > 1 )
{
// glycoct doesn't support this anyway, should never happen
throw new UnsupportedOperationException(
"multi-connections to repeat start residue not handled");
}
else if ( elist.size() == 1 )
{
// the normal case...
incoming_edge = elist.get(0);
}
else
{
// virtual repeat residue must at the root
assert v == ast.getRootVertex()
: "expected " + v + " == " + ast.getRootVertex();
ast.setRootVertex(
ast.getVertex( repeat_residue.getRootResidueToken() ) );
}
// outgoing edge
elist = v.getOutgoingEdges();
Edge<LinkageToken,ResidueToken> outgoing_edge = null;
if ( elist.size() > 1 )
{
// glycoct doesn't support this anyway, should never happen
throw new UnsupportedOperationException(
"multi-connections from repeat end residue not handled");
}
else if ( elist.size() == 1 )
{
// the normal case...
outgoing_edge = elist.get(0);
}
else
{
// virtual repeat residue must be a leaf
assert ast.getLeafVertices().contains( v );
}
assert incoming_edge == null
|| outgoing_edge == null
|| incoming_edge.getChild() == outgoing_edge.getParent();
// remove v (the virtual repeat token) from the AST
// (this will also remove its edges)
ast.remove( v );
// then reconnect edges to the repeat sub-tree:
if ( incoming_edge != null )
{
ast.addEdge(
incoming_edge.getParent().getValue(),
repeat_residue.getRootResidueToken(),
incoming_edge.getValue()
);
}
if ( outgoing_edge != null )
{
ast.addEdge(
repeat_residue.getLeafResidueToken(),
outgoing_edge.getChild().getValue(),
outgoing_edge.getValue()
);
}
}
if ( tracing )
log.trace("AST after inling repeats:\n" + ast.toString() );
}
private void inlineSubstituents( Graph<LinkageToken,ResidueToken> ast )
{
if ( tracing )
log.trace("before pruning common substituents, AST is:\n" + ast );
List<Vertex<LinkageToken,ResidueToken>> vertices_to_remove = null;
Vertex<LinkageToken,ResidueToken> substit_vert = null;
Edge<LinkageToken,ResidueToken> substit_edge;
for ( ResidueToken rt : ast.getAllVertexValues() )
{
// if residue is a type that we consider to be part of the
// sugar, then coalesce it with the Residue it's attached to
// and remove it from the graph.
Residue r = rt.getResidue();
boolean residue_should_be_merged = (r instanceof Substituent)
&& substituentIsPartOfMonosaccharide( (Substituent) r );
// if ( "n-acetyl".equals( res_name ) || "n".equals( res_name ) )
if ( residue_should_be_merged )
{
if ( debugging )
log.debug("merging common substituent: " + rt );
substit_vert = ast.getVertex( rt );
// assume that NAc is always a terminating child residue (ie: leaf)
assert substit_vert.countAttachedEdges() == 1;
substit_edge = substit_vert.getAttachedEdges().iterator().next();
assert substit_vert == substit_edge.getChild();
LinkageToken lt = substit_edge.getValue();
int position = lt.getLinkage().getParentTerminus();
ResidueToken parent = substit_edge.getParent().getValue();
// if ( position <= 0 )
// {
// log.warn("common substituent NOT merged because position is unknown");
// continue;
// }
mergeCommonSubstituent( rt, parent, position );
// ast.remove( substit_vert );
if ( vertices_to_remove == null )
vertices_to_remove = new ArrayList<Vertex<LinkageToken,ResidueToken>>( 8 );
vertices_to_remove.add( substit_vert );
// linkages.remove( lt ); // is this even necessary?
// it.remove();
}
}
if ( vertices_to_remove != null )
{
if ( tracing )
log.trace("Removing " + vertices_to_remove.size() + " common substituents:");
for ( Vertex<LinkageToken,ResidueToken> substituent : vertices_to_remove )
ast.remove( substituent );
if ( tracing )
log.trace("after pruning common substituents, AST is now:\n" + ast );
}
}
void mergeCommonSubstituent( ResidueToken to_merge, ResidueToken recipient, int position )
{
Residue r = recipient.getResidue();
if ( ! (r instanceof Monosaccharide) )
{
throw createSyntaxException(
recipient
, "Can't merge residue token "
+ to_merge
+ " -- recieving residue "
+ recipient
+ "is not a Monosaccharide"
);
}
Monosaccharide m = (Monosaccharide) r;
Substituent s = (Substituent) to_merge.getResidue();
try
{
m.attach( s, position );
}
catch ( Exception ex )
{
SequenceFormatException sfex = createSyntaxException(
recipient
, "Caught exception while trying to merge substituent '"
+ to_merge
+ "' into '"
+ recipient
+ "': "
+ ex.getMessage()
);
sfex.initCause( ex );
throw sfex;
}
}
/** Returns the repeat corresponding to the given index */
public RepeatResidueToken getRepeat( Token repeat_index )
{
return repeats.get( getRepeatIndex( repeat_index ) );
}
/** Returns the index of the given repeat (token) in the {@link #repeats} list. */
public int getRepeatIndex( Token repeat_index )
{
int i = Integer.parseInt( repeat_index.getText() );
if ( i > repeats.size() || i < 1 )
{
throw createSyntaxException( repeat_index,
"Invalid repeat index, index outside bounds");
}
return i - 1;
}
/** Returns the value of the given repeat bound token. */
public int getRepeatBound( Token repeat_bound )
{
String token_text = repeat_bound.getText();
int bound = -1;
if ( ! (token_text.equals("?") || token_text.equals("-1")) )
bound = Integer.parseInt( token_text );
if ( bound < -1 )
bound = -1;
return bound;
}
// final Residue getResidue( Token index )
// {
// return getResidueTokenFor( index ).getResidue();
// }
final ResidueToken getResidueToken( Token index )
{
return residues.get( getResidueIndexFor( index ) );
}
/**
* Called when the start of a repeat sugar has been
* encountered during the parse.
*/
public void repeatStarts( Token repeat_index )
{
int index = getRepeatIndex( repeat_index );
if ( tracing )
log.trace("entering repeat, index=" + index );
RepeatResidueToken r = repeats.get( index );
repeatsStack.add( r );
}
/**
* Called when the end of a repeat sugar has been
* encountered during the parse.
*/
public void repeatEnds( Token repeat_index )
{
int index = getRepeatIndex( repeat_index );
if ( tracing )
log.trace("exiting repeat, index=" + index );
RepeatResidueToken r = repeatsStack.remove( repeatsStack.size() - 1 );
assert r == repeats.get( index );
}
/**
* Sets the lower and upper bounds of the range of the repeat with
* the given index.
* @see RepeatResidueToken
*/
public void setRepeatRange( Token repeat_index, Token lower_bound, Token upper_bound )
throws SequenceFormatException
{
// get index of repeat
int index = getRepeatIndex( repeat_index );
// lower repeat range bound
int lower = getRepeatBound( lower_bound );
// upper repeat range bound
int upper = getRepeatBound( upper_bound );
if ( lower != -1 && upper != -1 && lower > upper )
{
throw createSyntaxException( lower_bound,
"Invalid repeat range, left bound must be lower than right bound");
}
RepeatResidueToken rt = repeats.get( index );
rt.setRepeatRange( lower, upper );
return;
}
/**
* Sets the {@link Superclass} of the last added {@link Residue}
* to the value of the given {@link Token}.
* @see #lastResidue()
*/
public void setSuperclass( Token superclass_tok )
throws SequenceFormatException
{
String name = superclass_tok.getText();
// superclass name is already checked by the lexer, so no
// try/catch or checking required here...
Superclass s = Superclass.forName( name );
ResidueToken rt = lastResidue();
if ( tracing )
{
log.trace("setting superclass " + s + " on monosaccharide " + rt );
traceParse( superclass_tok, "Superclass descriptor" );
}
assert rt instanceof MonosacResidueToken;
MonosacResidueToken mrt = (MonosacResidueToken) rt;
mrt.setSuperclass( s );
}
/**
* Sets the ring closure positions of the last residue added from
* the given terminii tokens.
*/
public void setRingClosure( Token term1, Token term2 )
throws SequenceFormatException
{
int t1 = 0, t2 = 0;
if ( term1.getText() == "x" )
t1 = 0;
else
t1 = Integer.parseInt( term1.getText() );
if ( term2.getText() == "x" )
t2 = 0;
else
t2 = Integer.parseInt( term2.getText() );
if ( t1 != 0 && t1 == t2 )
{
throw createSyntaxException( term1,
"Invalid ring closure positions: terminii cannot be equal");
}
if ( t1 > t2 )
{
throw createSyntaxException( term1,
"Invalid ring closure position: second terminus cannot be less than first");
}
ResidueToken rt = lastResidue();
if ( tracing )
{
log.trace(
"setting ring closure positions "
+ t1
+ "-"
+ t2
+ " for residue "
+ rt
);
}
assert rt instanceof MonosacResidueToken;
MonosacResidueToken mrt = (MonosacResidueToken) rt;
mrt.setRingStart( t1 );
mrt.setRingEnd( t2 );
return;
}
/** Returns the last residue token added. */
public ResidueToken lastResidue()
{
return graph.lastVertex().getValue();
}
/**
* Expects a {@link Token} with a valid Glycoct basetype stem, ie:
* text of form "a-dglc", "o-lman", "a-dgro-dgal", etc.
*
* {@inheritDoc}
*/
@Override
protected ResidueToken createMonosaccharideToken( Token name_tok )
throws SequenceFormatException
{
int i = 0;
String name = name_tok.getText();
// extract anomer
Anomer a = null;
try
{
a = Anomer.forName( name.charAt( i ) );
if ( a == null )
throw new RuntimeException();
}
catch ( Exception ex )
{
SequenceFormatException sfex = createSyntaxException(
name_tok
, "Invalid anomer '"
+ name.charAt(i)
+ "'; valid values are: "
+ join(", ", Anomer.values() )
);
sfex.initCause( ex );
throw sfex;
}
i++;
// extract basetype(s)
List<Basetype> basetypes = new ArrayList<Basetype>( 2 );
while ( i < name.length() )
{
// hyphen
if ( name.charAt(i) != '-' )
{
throw createSyntaxException(
name_tok.getColumn() + i - 1, "Expected a hyphen '-'");
}
i = _extract_basetype( name_tok, i + 1, basetypes );
}
Basetype basetype = null;
try
{
basetype = getBasetype( basetypes );
}
catch ( Exception ex )
{
SequenceFormatException sfex = createSyntaxException(
name_tok, ex.getMessage() );
sfex.initCause( ex );
throw sfex;
}
// create monosac
// Monosaccharide monosac = new SimpleMonosaccharide( basetype );
// monosac.setAnomer( a );
MonosacResidueToken m = new MonosacResidueToken( this, name_tok, null );
m.setAnomer( a );
m.setBasetype( basetype );
return m;
}
/**
* Extracts 1 basetype from the token text, and places into passed List,
* returning new cursor position.
*
* @param t Token that contains basetype name text
* @param i current index position of parse into Token t's text
* @param dest List that accumulates basetypes parsed from t.
* @return index position i after extraction of 1 basetype
*/
private final int _extract_basetype( Token t, int i, List<Basetype> dest_list )
throws SequenceFormatException
{
String name = t.getText();
int endpos = name.indexOf('-', i );
if ( endpos == -1 )
endpos = name.length();
StereoConfig stereo = null;
try
{
stereo = StereoConfig.forName( name.charAt(i) );
if ( stereo == null )
throw new RuntimeException();
}
catch ( Exception ex )
{
SequenceFormatException sfex = createSyntaxException(
t.getColumn() + i - 1
, "Invalid stereo-configuration for monosaccharide '"
+ name.charAt(i)
+ "'; valid values are: "
+ join(", ", StereoConfig.values() )
);
sfex.initCause( ex );
throw sfex;
}
i++;
String basetype_name = name.substring( i, endpos );
CommonBasetype basetype = CommonBasetype.forName( basetype_name );
if ( basetype == null )
{
throw createSyntaxException(
t.getColumn() + i - 1
, "Unknown or invalid monosaccharide basetype '"
+ basetype_name
+ "'; see the list of permissible basetypes in class CommonBasetype"
);
}
dest_list.add( getBasetype( stereo, basetype ) );
return endpos;
}
//~~~~~~~~~~~~~~~~~~~~~~~ PRIVATE METHODS ~~~~~~~~~~~~~~~~~~~~~~~
/** Returns integer position of a linkage terminus from given {@link Token}. */
private final int getLinkageIndexFor( Token t )
throws SequenceFormatException
{
int link_index = Integer.parseInt( t.getText() );
// can't be <= 0
if ( link_index <= 0 )
throw createSyntaxException( t, "Linkage count cannot be <= 0" );
// can't be larger than 1 + the number of linkages we have
if ( link_index > linkages.size() + 1 )
{
throw createSyntaxException(
t
, "Invalid linkage index '"
+ link_index
+ "', should be "
+ (linkages.size() + 1)
);
}
return link_index;
}
/** Extract an integer residue index from a {@link Token}. */
private final int getResidueIndexFor( Token t )
throws SequenceFormatException
{
int index = 0;
try
{
index = Integer.parseInt( t.getText() );
}
catch ( Exception ex )
{
SequenceFormatException sfex = createSyntaxException(
t, "Invalid residue number: " + ex.getMessage() );
sfex.initCause( ex );
throw sfex;
}
if ( index <= 0 )
throw createSyntaxException( t, "Residue order number cannot be 0" );
if ( index > residues.size() )
{
throw createSyntaxException(
t
, "Invalid residue number - there are only "
+ residues.size()
+ " residue(s) in the sequence"
);
}
return index - 1;
}
/** Extract an integer terminus from a {@link Token}. */
private final int getTerminusFor( Token t )
throws SequenceFormatException
{
int terminus = 0;
String s = t.getText();
if ( s == null || s == "?" || s == "-1" )
return LinkageToken.UNKNOWN_TERMINUS;
try { terminus = Integer.parseInt( s ); }
catch ( Exception ex )
{
SequenceFormatException sfex = createSyntaxException(
t, "Invalid terminal position: " + ex.getMessage() );
sfex.initCause( ex );
throw sfex;
}
if ( terminus < 1 )
{
throw createSyntaxException(
t, "Terminal position cannot be < 1" );
}
return terminus;
}
} // end class GlycoctParserAdapter