/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1932 $ by $Author: glycoslave $ on $Date:: 2010-08-05 #$
*/
package test.eurocarbdb.sugar.seq;
import org.testng.annotations.*;
import org.eurocarbdb.sugar.Sugar;
import org.eurocarbdb.sugar.SequenceFormat;
import org.eurocarbdb.sugar.SequenceFormatException;
/* class IupacSequenceFormatTest *//*******************************
*
* Unit test class for Iupac sequence parsing.
*/
// @Test( groups={"sugar.parsing.iupac"}, dependsOnGroups={"sugar.lib"} )
@Test( groups={"sugar.lib"} )
public class IupacSequenceFormatTest extends SequenceFormatTest
{
@Test
public SequenceFormat getParser()
{
assert SequenceFormat.Iupac != null;
return SequenceFormat.Iupac;
}
@Test
public void iupacSequences()
{
testParsing( correct_sequences );
}
@Test
public void iupacSequenceExceptions()
{
testParsingExceptions( incorrect_sequences );
}
@Test
public void iupacSequenceGeneration()
{
testSequenceGeneration( correct_sequences, SequenceFormat.Iupac );
}
// syntactically correct sequences
public static String[] correct_sequences =
{
// "regular" sequences
"Man"
, "Man(a1-4)Man"
, "GlcNAc(a1-4)Man"
, "Glc(a1-3)Glc(a1-3)Glc"
, "Man(a1-6)[Man(a1-3)]Man(a1-3)GlcNAc(b1-3)GlcNAc"
, "Man(a1-6)[Man(a1-4)][Man(a1-3)]GlcNAc(b1-3)GlcNAc"
, "GlcNAc(b1-4)[Man(a1-6)[Man(a1-4)[Man(a1-3)]Man(a1-4)][Man(a1-3)]GlcNAc(b1-3)]GlcNAc"
// sequences with substituents
, "Glc(a1-)P(-4)Glc"
, "Glc(a1-)S(-4)Glc"
, "Glc(a1-)[Glc(a1-)]P(-4)Glc"
, "Glc(a1-6)[Glc(a1-)P(-3)]Glc(a1-)P(-4)Glc"
// , "Glc(a1-)P(-)P(-4)Glc" <- todo
// structures with unknowns
, "Man(a1-?)Man"
, "Man(u1-3)Man"
, "NeuAc(a2-?)Gal(b1-4)GlcNAc(b1-2)Man(a1-6)[NeuAc(a2-?)Gal(b1-4)GlcNAc(b1-2)Man(a1-3)]Man(b1-3)GlcNAc(b1-3)GlcNAc"
// structures with linkage alternates
, "Man(a1-4|6)Man"
, "Man(a1-3|4|6)Man"
, "Man(a1-3|6)[Man(a1-3|4)]GlcNAc(b1-3)GlcNAc"
, "Man(a1-?)[Man(a1-3|4)]GlcNAc(b1-3)GlcNAc"
, "Glc(a1-)P(-3|4)Glc"
// sequences with repeats
, "{Glc(a1-4)Glc}(a1-4)Glc"
, "Glc(a1-6){Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){5:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){15:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){151:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){1515:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){10-21:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){1-20:Glc(a1-4)Glc}(a1-4)Glc"
, "Man(b1-4){Glc(a1-4)[Glc(a1-3)]Glc}(a1-4)Glc"
, "Man(b1-4){3-6:Glc(a1-4)[{1-5:Glc(a1-3)Glc}(a1-3)]Glc}(a1-4)Glc"
// sequences with multiconnections
, "NeuAc(a2-8,1-9)NeuAc"
, "NeuAc(a2-4,8-8,9-9)NeuAc"
};
// syntactically incorrect sequences
public static String[] incorrect_sequences =
{
// structures with residue name syntax errors
" "
, ")Man"
, "Man(a1-4Man"
, "Man(a1(-4Man"
, "Mana1-4Man"
, "a1-4Man"
, "Man(a14)Man"
, "Man(a1-4Man-"
, "Man(a1-4)"
, "Man(a1-4)Glc&NAc"
, "Man(a1-4)(GlcNAc)"
// , "Man(a1-4)Mannose" // residue name too long
// , "Ma" // residue name too short
, "lcNAc(b1-3)GlcNAc"
// structures with linkage syntax errors
, "Man(1-4)GlcNAc" // missing anomer
, "Man(x1-4)GlcNAc" // invalid anomer
, "GlcNAc(a1-4))Man" // unmatched/extra parenthesis
, "Man(a1-4Man" // unmatched/missing parenthesis
, "Man(a14)GlcNAc" // missing internal delimiter
// structures with branching syntax errors
, "Glc(a1-3)]Glc(a1-3)Glc" // unmatched opening branch
, "Glc(a1-3)[Glc(a1-3)Glc" // unmatched closing branch
, "Glc(a1-3)[]Glc(a1-3)Glc" // empty branch
, "Man(a1-6)[[Man(a1-4)]Man(a1-3)]GlcNAc(b1-3)GlcNAc" // doubled branch delim
, "Man(a1-6)[[Man(a1-4)]]Man(a1-3)GlcNAc(b1-3)GlcNAc" // doubled branch delims
, "Glc(a1-3)[Glc(a1-3)]" // missing root residue
, "Man(b1-4){5-1:Glc(a1-4)Glc}(a1-4)Glc" // repeat bounds in wrong order
, "Man(b1-4){1:Glc(a1-4)Glc}(a1-4)Glc" // repeat bound too low
};
} // end class