/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1932 $ by $Author: glycoslave $ on $Date:: 2010-08-05 #$
*/
package test.resourcesdb;
import java.io.PrintStream;
import java.util.Arrays;
import java.util.List;
import org.testng.annotations.*;
import org.eurocarbdb.resourcesdb.monosaccharide.Monosaccharide;
import org.eurocarbdb.resourcesdb.ResourcesDbException;
import org.eurocarbdb.resourcesdb.GlycanNamescheme;
import static org.eurocarbdb.resourcesdb.GlycanNamescheme.IUPAC;
import static org.eurocarbdb.resourcesdb.GlycanNamescheme.MONOSACCHARIDEDB;
@Test( groups="resourcesdb.parsing", sequential=true )
public class MonosacNameParsingTest
{
/** Iupac names of some common monosaccharides */
public static final List<String> simpleMonosacs
= Arrays.asList(
"a-D-Manp"
, "b-D-Manp"
, "b-D-Manf"
, "b-d-Manp"
, "B-d-Manp"
, "a-Man"
, "Manp"
, "Man"
, "b-D-Galp"
, "Galf"
, "Gal"
, "b-D-Xylf"
, "b-D-Araf"
, "b-D-Ribf"
, "b-D-Eryf"
);
/** Iupac names of some common open-chain monosaccharides */
public static final List<String> openchainMonosacs
= Arrays.asList(
"aldehydo-l-gal"
, "l-gal-aric"
, "keto-d-fru"
, "keto-d-xylhex2ulo"
, "aldehydo-d-xylhex1,2diulo"
);
/** Iupac names of some common sialic acids */
public static final List<String> sialicMonosacs
= Arrays.asList(
"a-D-neup"
, "a-D-neup5ac"
, "a-D-neup5nac"
, "a-D-neup5gc"
, "a-D-neup5ngc"
, "a-D-neup5ac8ac"
, "a-d-NeupAc"
, "d-gro-a-d-3-deoxy-galnon2ulop5NAc-onic"
);
/** Iupac names of some monosaccharides that have had a loss of
* one or more stereochemical centres. */
public static final List<String> stereolossMonosacs
= Arrays.asList(
"a-d-4-deoxy-Glcp3en3OMe"
, "a-d-glc2ulop"
, "b-D-Glcp2NH"
, "b-D-4-deoxy-Glcp"
, "b-D-2-deoxy-ManpA"
);
/** Iupac names of some indefinite (partially unknown) monosaccharides */
public static final List<String> indefiniteMonosacs
= Arrays.asList(
"a-?-Fucp"
, "D-Glc"
, "?-D-Glc"
, "?-L-Fuc"
);
/** Iupac names of some monosaccharides with deliberate syntactic or semantic errors.
* All of these are expected to throw exceptions */
public static final List<String> brokenMonosacs
= Arrays.asList(
"a-?-Fluc"
, "6-deoxy-Xylp"
, "a-d-7-deoxy-manHepp"
);
/** Iupac names of some large monosaccharides */
public static final List<String> supersizeMonosacs
= Arrays.asList(
"a-D-Hepp"
, "D-gro-a-D-manHepp"
, "D-gro-a-D-Hepp"
, "a-d-6-deoxy-manHepp"
, "d-gro-a-d-7-deoxy-manHepp"
);
//~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TESTS ~~~~~~~~~~~~~~~~~~~~~~~~~~
@Test
public void resourcesdbParseSimple()
{
System.out.println("parsing simple monosacs...");
parseList( simpleMonosacs );
}
@Test
public void resourcesdbParseOpenchain()
{
System.out.println("parsing openchain monosacs...");
parseList( openchainMonosacs );
}
@Test
public void resourcesdbParseSialic()
{
System.out.println("parsing sialic monosacs...");
parseList( sialicMonosacs );
}
@Test
public void resourcesdbParseStereoloss()
{
System.out.println("parsing stereoloss monosacs...");
parseList( stereolossMonosacs );
}
@Test
public void resourcesdbParseIndefinite()
{
System.out.println("parsing indefinite monosacs...");
parseList( indefiniteMonosacs );
}
@Test
public void resourcesdbParseSupersize()
{
System.out.println("parsing supersized monosacs...");
parseList( supersizeMonosacs );
}
@Test
public void resourcesdbParseIncorrect()
{
System.out.println("parsing deliberately broken monosacs...");
parseIncorrect( brokenMonosacs );
}
static final List<String> commonSubstituents = Arrays.asList(
"n-acetyl"//,
// "deoxy" - doesn't work
//"nh2" - doesn't work
);
/** Test not enabled -- Resourcesdb has bugs with name generation */
@Test( enabled=false )
public void resourcesdbAddSubstituents()
throws ResourcesDbException
{
System.out.println("trying to add various substituents...");
System.out.println();
System.out.println("--- simple monosacs ---");
generateNameFor( simpleMonosacs );
System.out.println();
System.out.println("--- indefinite monosacs ---");
generateNameFor( indefiniteMonosacs );
System.out.println();
System.out.println("--- supersized monosacs ---");
generateNameFor( supersizeMonosacs );
System.out.println();
System.out.println("--- open chain monosacs ---");
generateNameFor( openchainMonosacs );
}
/** Test not enabled -- Resourcesdb mass calculation doesn't work */
@Test( enabled=false )
public void resourcesdbCalculateMass()
throws ResourcesDbException
{
Monosaccharide m;
for ( String name : simpleMonosacs )
{
m = new Monosaccharide( IUPAC, name );//parseIupac( name );
double mass = m.getMonoMass();
System.out.println( name + ": " + mass );
}
}
// private methods ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
private final void generateNameFor( List<String> list )
throws ResourcesDbException
{
long start;
int errors = 0;
for ( String monosac_name : list )
{
for ( String substit_name : commonSubstituents )
{
System.out.println(
"trying "
+ monosac_name
+ " + "
+ substit_name
+ ":"
);
Monosaccharide m = parseIupac( monosac_name );
try
{
m.addSubstitution( substit_name, 2 );
System.out.println("addition of substituent ok");
}
catch ( ResourcesDbException ex )
{
System.out.println("addition of substituent error: " + ex.getMessage() );
errors++;
continue;
}
start = now();
String name = getMonosacNameFor( m );
System.out.println(
"generated name: "
+ name
+ " ("
+ (now() - start)
+ "msec )"
);
// check that MSDB can parse its own generated name
try
{
m = parseMsdb( name );
System.out.println("generated name parsed ok");
System.out.println();
}
catch ( ResourcesDbException ex )
{
System.out.println(
"generated name error: MSDB failed to parse generated name: "
+ ex.getMessage()
);
System.out.println();
errors++;
// throw ex;
}
}
}
if ( errors > 0 )
{
throw new RuntimeException(
errors
+ " sequences that were generated by MSDB failed to parse."
+ " check the test output for details."
);
}
}
private final String getMonosacNameFor( Monosaccharide m )
throws ResourcesDbException
{
// try
// {
m.buildName();
// }
// catch ( ResourcesDbException ex )
// {
// return "error: " + ex.toString() + "\n";
// }
return m.getName();
}
private final void parseIncorrect( List<String> names )
{
int successful = 0, failed = 0;
for ( String name : names )
{
try
{
parseIupac( name );
System.out.println( name + ": failed, expected an exception" );
failed++;
}
catch ( Exception ex )
{
System.out.println( name + ": successful, exception was: " + ex );
successful++;
}
}
System.out.println();
report( successful, failed );
}
private final void parseList( List<String> names )
{
int successful = 0, failed = 0;
long loop_start = now();
for ( String name : names )
{
long seq_start = now();
try
{
parseIupac( name );
System.out.println( name + ": successful, " + (now() - seq_start) + "msec");
successful++;
}
catch ( Exception ex )
{
System.out.println( name + ": failed, " + (now() - seq_start) + "msec");
System.out.println( "-> " + ex.toString() );
failed++;
}
}
System.out.println();
System.out.println(
"average parse time: "
+ ((now() - loop_start) / (successful + failed))
+ "msec/monosac"
);
report( successful, failed );
}
private final Monosaccharide parseIupac( String monosac_name )
throws ResourcesDbException
{
return new Monosaccharide( IUPAC, monosac_name );
}
private final Monosaccharide parseMsdb( String monosac_name )
throws ResourcesDbException
{
return new Monosaccharide( MONOSACCHARIDEDB, monosac_name );
}
private static final void report( int successful, int failed )
{
int total = successful + failed;
System.out.println(
""
+ total
+ " monosacs tested, "
+ successful
+ " successful, "
+ failed
+ " failed"
);
System.out.println();
if ( failed > 0 )
{
throw new RuntimeException(
""
+ failed
+ " of "
+ total
+ " sequences failed"
);
}
}
private static final long now()
{
return System.currentTimeMillis();
}
}