/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1147 $ by $Author: glycoslave $ on $Date:: 2009-06-04 #$
*/
package org.eurocarbdb.sugar.seq.grammar;
import java.io.*;
import antlr.collections.AST;
import antlr.CommonAST;
import antlr.RecognitionException;
import antlr.TokenStreamException;
import antlr.TokenStreamRecognitionException;
import org.eurocarbdb.sugar.Sugar;
import org.eurocarbdb.sugar.SequenceFormatException;
import org.eurocarbdb.sugar.seq.grammar.IupacLexer;
import org.eurocarbdb.sugar.seq.grammar.IupacParser;
//import antlr.debug.DebuggingParser;
/* class IupacTest *//*********************************************
*
* Unit test class for Iupac sequence parsing.
*
* To run:
*<pre>
* java org.eurocarbdb.sugar.seq.grammar.IupacTest
*</pre>
*
* for an interactive shell:
*
*<pre>
* java org.eurocarbdb.sugar.seq.grammar.SequenceTestHarness\$Shell Iupac
*</pre>
*
* Don't forget to set your classpath to include the ANTLR, common logging,
* and log4j jars.
*
*/
public class IupacTest extends SequenceTestHarness
{
public static void main( String[] args ) throws Exception
{
new IupacTest().testParsing( correct_sequences, incorrect_sequences );
}
// syntactically correct sequences
public static String[] correct_sequences =
{
// "regular" sequences
"Man"
, "Man(a1-4)Man"
, "GlcNAc(a1-4)Man"
, "Glc(a1-3)Glc(a1-3)Glc"
, "Man(a1-6)[Man(a1-3)]Man(a1-3)GlcNAc(b1-3)GlcNAc"
, "Man(a1-6)[Man(a1-4)][Man(a1-3)]GlcNAc(b1-3)GlcNAc"
, "GlcNAc(b1-4)[Man(a1-6)[Man(a1-4)[Man(a1-3)]Man(a1-4)][Man(a1-3)]GlcNAc(b1-3)]GlcNAc"
// sequences with substituents
, "Glc(a1-)P(-4)Glc"
, "Glc(a1-)S(-4)Glc"
, "Glc(a1-)[Glc(a1-)]P(-4)Glc"
, "Glc(a1-6)[Glc(a1-)P(-3)]Glc(a1-)P(-4)Glc"
, "Glc(a1-)P-P(-4)Glc"
// structures with unknowns
, "Man(a1-?)Man"
, "Man(u1-3)Man"
, "NeuAc(a2-?)Gal(b1-4)GlcNAc(b1-2)Man(a1-6)[NeuAc(a2-?)Gal(b1-4)GlcNAc(b1-2)Man(a1-3)]Man(b1-3)GlcNAc(b1-3)GlcNAc"
// structures with linkage alternates
, "Man(a1-4|6)Man"
, "Man(a1-3|4|6)Man"
, "Man(a1-3|6)[Man(a1-3|4)]GlcNAc(b1-3)GlcNAc"
, "Man(a1-?)[Man(a1-3|4)]GlcNAc(b1-3)GlcNAc"
, "Glc(a1-)P(-3|4)Glc"
// sequences with repeats
, "{Glc(a1-4)Glc}(a1-4)Glc"
, "Glc(a1-6){Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){5:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){15:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){151:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){1515:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){10-21:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){1-20:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){Glc(a1-4)[Glc(a1-3)]Glc}(a1-4)Glc"
, "Man(b1-4){3-6:Glc(a1-4)[{1-5:Glc(a1-3)Glc}(a1-3)]Glc}(a1-4)Glc"
// sequences with multiconnections
, "NeuAc(a2-8,1-9)NeuAc"
, "NeuAc(a2-4,8-8,9-9)NeuAc"
};
// syntactically incorrect sequences
public static String[] incorrect_sequences =
{
// structures with residue name syntax errors
" "
, ")Man"
, "Man(a1-4Man"
, "Man(a1(-4Man"
, "Mana1-4Man"
, "a1-4Man"
, "Man(a14)Man"
, "Man(a1-4Man-"
, "Man(a1-4)"
, "Man(a1-4)Glc&NAc"
, "Man(a1-4)(GlcNAc)"
// , "Man(a1-4)Mannose" // residue name too long
// , "Ma" // residue name too short
, "lcNAc(b1-3)GlcNAc"
// structures with linkage syntax errors
, "Man(1-4)GlcNAc" // missing anomer
, "Man(x1-4)GlcNAc" // invalid anomer
, "GlcNAc(a1-4))Man" // unmatched/extra parenthesis
, "Man(a1-4Man" // unmatched/missing parenthesis
, "Man(a14)GlcNAc" // missing internal delimiter
// structures with branching syntax errors
, "Glc(a1-3)]Glc(a1-3)Glc" // unmatched opening branch
, "Glc(a1-3)[Glc(a1-3)Glc" // unmatched closing branch
, "Glc(a1-3)[]Glc(a1-3)Glc" // empty branch
, "Man(a1-6)[[Man(a1-4)]Man(a1-3)]GlcNAc(b1-3)GlcNAc" // doubled branch delim
, "Man(a1-6)[[Man(a1-4)]]Man(a1-3)GlcNAc(b1-3)GlcNAc" // doubled branch delims
, "Glc(a1-3)[Glc(a1-3)]" // missing root residue
, "Man(b1-4){5-1:Glc(a1-4)Glc}(a1-4)Glc" // repeat bounds in wrong order
, "Man(b1-4){1:Glc(a1-4)Glc}(a1-4)Glc" // repeat bound too low
};
public ParserAdaptor getParserFor( String seq )
{
IupacLexer lexer = new IupacLexer( new StringReader( seq ) );
IupacParser parser = new IupacParser( lexer );
parser.setSequence( seq );
return parser;
}
} // end class