/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1273 $ by $Author: glycoslave $ on $Date:: 2009-06-26 #$
*/
package test.eurocarbdb.sugar.seq;
import org.testng.annotations.*;
import org.eurocarbdb.sugar.Sugar;
import org.eurocarbdb.sugar.SequenceFormat;
import org.eurocarbdb.sugar.SequenceFormatException;
/**
* Specific unit test for GlycoCT-condensed parsing
*/
// @Test( groups={"sugar.parsing.glycoct"}, dependsOnGroups={"sugar.lib"} )
@Test( groups={"sugar.lib"} )
public class GlycoctSequenceFormatTest extends SequenceFormatTest
{
@Test
public SequenceFormat getParser()
{
assert SequenceFormat.Glycoct != null;
return SequenceFormat.Glycoct;
}
@Test
public void glycoctSimpleResidueNames()
{
testParsing( correct_simple_sequences );
}
@Test( dependsOnMethods={"glycoctSimpleResidueNames"} )
public void glycoctDefiniteSequences()
{
testParsing( correct_definite_sequences );
}
@Test( dependsOnMethods={"glycoctSimpleResidueNames"} )
public void glycoctIndefiniteSequences()
{
testParsing( correct_indefinite_sequences );
}
@Test
public void glycoctSequenceFormatExceptions()
{
testParsingExceptions( incorrect_sequences );
}
// syntactically correct sequences
public static String[] correct_simple_sequences =
{
// seq 1
// a-d-fructose-f
"RES 1b:a-dara-HEX-2:5|2:keto;"
// seq 2 - a deoxy sugar:
// 2,6-dideoxy-3-O-methyl-a-D-arabino-HEXopyranose
, "RES 1b:a-dara-HEX-1:5|2:d|6:d;"
// seq 3 - a uronic acid:
// D-Glucopyranosyluronic acid, b-d-GlcU-p
, "RES 1b:b-dglc-HEX-1:5|6:a;"
// seq 4 - an aldonic acid:
// D-Gluconic acid
, "RES 1b:o-dglc-HEX-0:0|1:a;"
// seq 5 - an amino sugar:
// 2,6-diamino-2,3,6-trideoxy-a-D-ribo-HEXopyranose
, "RES 1b:a-drib-HEX-1:5|3:d;2s:n;3s:n;"
+ "LIN 1:1d(2-1)2n;2:1d(6-1)3n;"
// seq 6 - a thiol sugar:
// 3-amino-3,4-dideoxy-4-thio-a-D-galactopyranose
, "RES 1b:a-dgal-HEX-1:5;2s:n;4s:thiol;"
+ "LIN 1:1d(3-1)2n;2:1d(4-1)3n;"
// seq 7 - an alditol:
// D-Arabinitol
, "RES 1b:o-dara-PEN-0:0|1:aldi;"
// seq 8 - an intramolecular anhydride:
// 3,6-anhydro-a-D-glucopyranose
, "RES 1b:a-dglc-HEX-1:5;2s:lactone;"
+ "LIN 1:1d(3-6)2o;"
// seq 9 - an unsaturated monosac:
// 2,3-dideoxy-a-D-erythro-HEX-2-en-pyranose
//, "RES 1b:a-dery-HEX-1:5|2:d|2,3:en|3d;" // error in manual '3d'
, "RES 1b:a-dery-HEX-1:5|2:d|2,3:en|3:d;"
// seq 10 - a lactone:
// L-xylo-HEX-2-ulosono-1,4-lactone (Vitamin C isomer)
, "RES 1b:o-lxyl-HEX-0:0|1:a|2:keto;"
+ "LIN 1:1d(1-4)1o;"
// seq 11 - a sialic acid: NeuGc
// CHECK: I think this sequence is wrong in the manual -- there should be
// a linkage type id at the end of the sequence before the final semicolon.
, "RES 1b:a-dgro-dgal-NON-2:6|1:a|2:keto|3:d;1s:n-glycolyl;"
+ "LIN 1:1o(5-1)2d;"
};
public static String[] correct_definite_sequences =
{
// seq 12 - Lewis X
"RES"
+ "1b:b-dgal-HEX-1:5;"
+ "2s:n-acetyl;"
+ "3b:a-lgal-HEX-1:5|6:d;"
+ "4b:b-dgal-HEX-1:5;"
+ "LIN"
+ "1:1d(2-1)2n;"
+ "2:1o(3-1)3d;"
+ "3:1o(4-1)4d;"
};
// structures with unknowns
public static String[] correct_indefinite_sequences =
{
// seq 13 -
"RES"
+ "1b:a-dglc-HEX-1:5;"
+ "2s:n-acetyl;"
+ "3b:a-dery-HEX-1:5|2:d;" // error in manual '2d'
+ "4b:a-dgro-dgal-NON-2:6|1:a|2:keto|3:d;"
+ "5s:n;"
+ "6b:a-dtal-HEX-1:5|6:d;" // error in manual '6d'
+ "7s:n;"
+ "LIN"
+ "1:1d(?-1)2n;"
+ "2:4d(?-1)5n;"
+ "3:6d(?-1)7n;"
// structures with linkage alternates
// sequences with repeats
, "RES"
+ "1b:a-dgal-HEX-1:5;"
+ "2s:n-acetyl;"
+ "3b:b-dgal-HEX-1:5;"
+ "4b:b-dglc-HEX-1:5;"
+ "5s:n-acetyl;"
+ "6b:b-dgal-HEX-1:5;"
+ "7r:r1;"
+ "8b:b-dglc-HEX-1:5;"
+ "9s:n-acetyl;"
+ "10b:b-dgal-HEX-1:5;"
+ "LIN"
// + "R1:1d(2+1)2n;"
+ "1:1d(2+1)2n;"
+ "2:1o(3+1)3d;"
+ "3:1o(6+1)4d;"
+ "4:4d(2+1)5n;"
+ "5:4o(4+1)6d;"
+ "6:6o(6+1)7n;"
+ "7:7n(6+1)8d;"
+ "8:8d(2+1)9n;"
+ "9:8o(4+1)10d;"
+ "REP"
+ "REP1:13o(6+1)11d=-1--1"
+ "RES"
// + "R11b:b-dglc-HEX-1:5;"
+ "11b:b-dglc-HEX-1:5;"
+ "12s:n-acetyl;"
+ "13b:b-dgal-HEX-1:5;"
+ "LIN"
+ "10:11d(2+1)12n;"
+ "11:11o(4+1)13d;"
// sequences with multiconnections
, "RES\n"
+ "1b:x-dglc-HEX-x:x\n"
+ "2b:b-dglc-HEX-1:5\n"
+ "3b:b-dglc-HEX-1:5\n"
+ "4b:b-dglc-HEX-1:5\n"
+ "5b:b-dglc-HEX-1:5\n"
+ "6b:b-dglc-HEX-1:5\n"
+ "7b:b-dglc-HEX-1:5\n"
+ "LIN\n"
+ "1:1o(6+1)2d\n"
+ "2:2o(3+1)3d\n"
+ "3:2o(6+1)4d\n"
+ "4:4o(6+1)5d\n"
+ "5:5o(3+1)6d\n"
+ "6:5o(6+1)7d\n"
};
// syntactically incorrect sequences
public static String[] incorrect_sequences =
{
// basic sequence errors
"" // 1: empty string
, " " // 2: space only
, "1b:a-dara-HEX-2:5|2:keto;" // 3: no RES section id
// , "RES 1b:a-dara-HEX-2:5|2:keto" // 4: missing semicolon at end <- this is now ok
, "RES 1b:a-dara-HEX-2:5|2:keto|" // 5: invalid end char
, "RES 1b:a-dara-HEX-2:5|2:keto;;"// 6: double semicolon at end
, "RES 1:a-dara-HEX-2:5|2:keto;" // 7: missing residue type 'b'
, "RES b:a-dara-HEX-2:5|2:keto;" // 8: missing residue numbering
, "RES :a-dara-HEX-2:5|2:keto;" // 9: missing residue number/type
, "RES 1b:a-dara-2:5|2:keto;" // 10: missing ring config
// ring closure errors
, "RES 1b:a-dara-HEX|2:keto;" // 11: missing ring closure '-2:5'
, "RES 1b:a-dara-HEX-2:0|2:keto;" // 12: invalid ring closure position
, "RES 1b:a-dara-HEX-2:2|2:keto;" // 13: duplicate ring closure position
, "RES 1b:a-dara-HEX-2:5:2:keto;" // 14: '|' mistyped as ':'
// modification errors
, "RES 1b:a-dara-HEX-2:5|0:en;" // 15: invalid modification terminus position
, "RES 1b:a-dara-HEX-2:5|2,2:en;" // 16: duplicate modification terminus position
, "RES 1b:a-dara-HEX-2:5|2:zzzzz;"// 17: invalid mod name
// structures with linkage syntax errors
// structures with branching syntax errors
};
} // end class