/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1231 $ by $Author: glycoslave $ on $Date:: 2009-06-19 #$
*/
package org.eurocarbdb.sugar.seq.grammar;
import java.io.*;
import java.util.*;
import antlr.collections.AST;
import antlr.CommonAST;
import antlr.RecognitionException;
import antlr.TokenStreamException;
import antlr.TokenStreamRecognitionException;
import org.eurocarbdb.sugar.Sugar;
import org.eurocarbdb.sugar.SequenceFormatException;
import org.eurocarbdb.sugar.seq.grammar.GlycoctLexer;
import org.eurocarbdb.sugar.seq.grammar.GlycoctParser;
//import antlr.debug.DebuggingParser;
//import antlr.debug.misc.ASTFrame;
/**
<pre>
Usage:
to test:
java -cp lib org.eurocarbdb.seq.grammar.GlycoctTest
for an interactive shell:
java -cp 'lib:lib/antlr-2.7.5.jar' org.eurocarbdb.sugar.seq.grammar.GlycoctTest\$Shell
</pre>
*/
public abstract class SequenceTestHarness
{
/** For interactive use. */
public static class Shell
{
public static void main( String[] args ) throws Exception
{
org.apache.log4j.ConsoleAppender c
= new org.apache.log4j.ConsoleAppender(
new org.apache.log4j.PatternLayout("%20C{1} : %m%n") );
c.setImmediateFlush( true );
org.apache.log4j.BasicConfigurator.configure( c );
String format = args[0];
SequenceTestHarness test
= (SequenceTestHarness) Class.forName(
SequenceTestHarness.class.getPackage().getName()
+ format
+ "Test"
).newInstance();
for (
System.out.print("enter a sugar sequence > " );;
System.out.print("enter another sugar sequence > " )
)
{
String seq = new BufferedReader(
new InputStreamReader(
System.in )).readLine();
try
{
Sugar s = test.getSugar( seq );
System.err.println( "sequence is correct" );
System.out.println( s );
}
catch ( SequenceFormatException e )
{
System.err.println( "Syntax error: " + e.getMessage() );
e.printStackTrace();
}
catch ( Exception e )
{
System.err.println( "quitting" );
System.exit( 1 );
}
}
}
}
public abstract ParserAdaptor getParserFor( String seq )
;
public Sugar getSugar( String seq ) throws SequenceFormatException
{
System.out.println("parsing seq '" + seq + "'" );
/*
GlycoctLexer lexer = new GlycoctLexer( new StringReader( seq ) );
GlycoctParser parser = new GlycoctParser( lexer );
parser.setSequence( seq );
*/
ParserAdaptor parser = getParserFor( seq );
long start = System.currentTimeMillis();
try
{
parser.sugar();
}
catch ( RecognitionException e )
{
throw new SequenceFormatException(
seq,
e.column - 1,
e.getMessage()
);
}
catch ( TokenStreamRecognitionException e )
{
throw new SequenceFormatException(
seq,
e.recog.column - 1,
e.getMessage()
);
}
catch ( TokenStreamException e )
{
// we don't really care about this i don't think...
e.printStackTrace();
}
System.out.println();
System.out.println( "parsed sugar AST:" );
System.out.println( parser.graph );
/* TODO: translation needs more work, esp. unknown/missing linkages.
System.out.println( "translating AST -> sugar:" );
Sugar s = parser.getSugar();
System.out.println();
System.out.println( "parsed sugar:" );
System.out.println( s.toString() );
*/
System.out.println("seq was " + seq );
return null;
}
public void testParsing( String[] correct_sequences,
String[] incorrect_sequences )
{
// setup logging handler (ughhh)
org.apache.log4j.ConsoleAppender c
= new org.apache.log4j.ConsoleAppender(
new org.apache.log4j.PatternLayout("%20C{1} : %m%n") );
c.setImmediateFlush( true );
org.apache.log4j.BasicConfigurator.configure( c );
List<Exception> failed = new ArrayList<Exception>();
// CORRECT SEQUENCES
//
// iterate through the collection of syntactically-correct
// sequences - none of these should throw format exceptions.
//
System.err.println("=== correct sequences ===");
int count_correct = 0;
int count_failed = 0;
long parse_time_msec = 0;
long cumulative_time_msec = 0;
int count_total_chars_parsed = 0;
for ( String seq : correct_sequences )
{
try
{
System.err.println();
System.err.println( "--- parsing correct sequence "
+ ++count_correct
+ " ---");
System.err.println( seq );
long start_time = System.currentTimeMillis();
getSugar( seq );
parse_time_msec = System.currentTimeMillis() - start_time;
cumulative_time_msec += parse_time_msec;
count_total_chars_parsed += seq.length();
System.err.println("TEST PASSED: sequence appears correct");
System.err.println("parse took "
+ parse_time_msec
+ " msec"
);
}
catch ( SequenceFormatException e )
{
// this means a correct sequence is actually wrong, or
// there is an error in the parser.
System.err.println();
System.err.println( "*** TEST FAILED ***" );
System.err.println( "this sequence should have parsed "
+ "correctly, but threw a parse exception "
+ "-- check it!!!"
);
e.printStackTrace();
failed.add( e );
count_failed++;
}
}
// INCORRECT SEQUENCES
//
// iterate through the collection of sequences that have deliberate
// syntax errors - all of these *should* throw SequenceFormatExceptions.
//
System.err.println();
System.err.println("=== syntactically incorrect sequences ===");
int count_incorrect = 0;
for ( String seq : incorrect_sequences )
{
try
{
System.err.println();
System.err.println( "--- parsing incorrect sequence "
+ ++count_incorrect
+ " ---");
System.err.println( seq );
getSugar( seq );
// this test fails if it gets to this point since we were
// expectingly sequence format exceptions to have been thrown.
count_failed++;
throw new RuntimeException(
"*** TEST FAILED ***\n"
+ seq
+ "\n"
+ "the sequence above was determined to be correct, "
+ "when it should have thrown a sequence format error "
+ "-- check it!" );
}
catch ( SequenceFormatException e )
{
System.err.println();
System.err.println( "TEST PASSED: Sequence correctly judged as "
+ "incorrect, exception was:" );
e.printStackTrace();
}
catch ( RuntimeException e )
{
System.err.println(
"ERROR!!! Expected a SequenceFormatException, but got a "
+ e.getClass().getName()
);
System.err.println( e.toString() );
failed.add( e );
}
}
// reporting...
System.err.println();
System.err.println("=== SUMMARY ===");
System.err.println( count_failed == 0
? "All tests successful"
: count_failed + " test(s) failed"
);
// some performance metrics...
System.err.println( String.format(
"total parse time (for the %d correct sequences): %d msec",
count_correct, cumulative_time_msec ) );
System.err.println( String.format(
"avg parse time / sequence: %.1f msec",
(double) cumulative_time_msec / count_correct ) );
System.err.println( String.format(
"avg parse time / sequence char: %.3f msec",
(double) cumulative_time_msec / count_total_chars_parsed ) );
if ( failed.size() > 0 )
{
System.err.println();
System.err.println( "The tests that failed were:");
for ( Exception e : failed )
{
System.err.println();
e.printStackTrace();
}
}
return;
}
}