/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1560 $ by $Author: glycoslave $ on $Date:: 2009-07-21 #$ */ package org.eurocarbdb.sugar.seq; import java.io.StringReader; import java.util.List; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import org.apache.log4j.Logger; import org.eurocarbdb.sugar.Sugar; import org.eurocarbdb.sugar.Residue; import org.eurocarbdb.sugar.Substituent; import org.eurocarbdb.sugar.Substituents; import org.eurocarbdb.sugar.Modification; import org.eurocarbdb.sugar.Monosaccharide; import org.eurocarbdb.sugar.SugarSequence; import org.eurocarbdb.sugar.SequenceFormat; import org.eurocarbdb.sugar.CommonSubstituent; import org.eurocarbdb.sugar.SequenceFormatException; import org.eurocarbdb.sugar.impl.ComplexMonosaccharide; import org.eurocarbdb.sugar.seq.grammar.GlycoctLexer; import org.eurocarbdb.sugar.seq.grammar.GlycoctParser; import org.eurocarbdb.sugar.seq.grammar.ParserAdaptor; import org.eurocarbdb.sugar.seq.grammar.GlycoctParserAdaptor; import static org.eurocarbdb.util.StringUtils.join; /* class GlycoctSequenceFormat *//********************************* * * Implements parsing and generation of carbohydrate sequences in * GlycoCT format. * * @author mjh * @see GlycoctParser * @see GlycoctLexer * @see GlycoctParserAdaptor * @see SugarSequence * @see Sugar */ public class GlycoctSequenceFormat implements SequenceFormat { //~~~~~~~~~~~~~~~~~~~~~~ STATIC FIELDS ~~~~~~~~~~~~~~~~~~~~~~~~~~ /** Logging handle. */ static final Logger log = Logger.getLogger( GlycoctSequenceFormat.class ); public static final String UNKNOWN_RING_POSITION = "x"; //~~~~~~~~~~~~~~~~~~~~~~~~~ FIELDS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ //~~~~~~~~~~~~~~~~~~~~~~ CONSTRUCTORS ~~~~~~~~~~~~~~~~~~~~~~~~~~~ //~~~~~~~~~~~~~~~~~~~~~ STATIC METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~ /** * Returns the GlycoCT sequence for the passed Monosaccharide. * @see #getSequence(Monosaccharide) * * @deprecated use {@link #getSequence(Monosaccharide)} */ @Deprecated public static String getGlycoCTName( Monosaccharide m ) { return new GlycoctSequenceFormat().getSequence( m ); } // (no constructors) //~~~~~~~~~~~~~~~~~~~~~~~~~ METHODS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ /* getName *//************************************************* * * Returns "GlycoCT", the name of this format. * * @see SequenceFormat#getName() */ public String getName() { return "GlycoCT"; } /* getSugar *//******************************************* * * Parses a GlycoCT sequence string into a Sugar object. * * @see SequenceFormat.getSugar(String) */ public Sugar getSugar( String sequence ) throws SequenceFormatException { log.debug("note: replacing unknown terminii ('-1', 'x') with '?', and linearising sequence..."); String tmp = sequence; tmp = tmp.replace("x:x", "0:0"); tmp = tmp.replace('\n', ' '); tmp = tmp.replace("(-1", "(?"); tmp = tmp.replace("--1)", "-?)"); // assert sequence.length() == tmp.length(); sequence = tmp; GlycoctLexer lexer = new GlycoctLexer( new StringReader( sequence ) ); GlycoctParser parser = new GlycoctParser( lexer ); ParserAdaptor.performParse( parser, sequence ); return parser.getSugar(); } /** * Parses {@link Monosaccharide}s sequences in GlycoCT format, ie: * monosaccharide names of the form "a-dman", "b-dglc", "a-dgro-dgal". */ public Monosaccharide getMonosaccharide( String seq ) throws SequenceFormatException { if ( seq == null || seq.length() == 0 ) throw new IllegalArgumentException( "Monosaccharide name can't be null or zero-length"); log.warn("this method is currently returning null"); return null; } public Substituent getSubstituent( String seq ) throws SequenceFormatException { // throw new UnsupportedOperationException( "NOT YET IMPLEMENTED" ); // return CommonSubstituent.forName( seq ); return Substituents.getSubstituent( seq ); } /* getSequence *//***************************************** * * Produces a GlycoCT sequence for the passed Sugar. * @see SequenceFormat.getSugar(Sugar) */ public String getSequence( Sugar s ) { // TODO: Rene's exporter code would go here! throw new UnsupportedOperationException( "NOT YET IMPLEMENTED" ); } /* getSequence *//***************************************** * * Temporary 'bridging' method to support Rene's Sugar * */ public String getSequence( org.eurocarbdb.MolecularFramework.sugar.Sugar s ) { org.eurocarbdb.MolecularFramework.io.GlycoCT.SugarExporterGlycoCTCondensed objGlycoCTCon = new org.eurocarbdb.MolecularFramework.io.GlycoCT.SugarExporterGlycoCTCondensed(); String seq = null; try { objGlycoCTCon.start(s); seq = objGlycoCTCon.getHashCode(); } catch (org.eurocarbdb.MolecularFramework.util.visitor.GlycoVisitorException e ) { log.warn( "failed to calculate condensed glyco ct: " + e.getMessage() , e ); seq = null; } return seq; } public String getSequence( Monosaccharide ms ) { ComplexMonosaccharide m = (ms instanceof ComplexMonosaccharide) ? (ComplexMonosaccharide) ms : new ComplexMonosaccharide( ms ); String basetype = m.getAnomer().getSymbol() + "-" + m.getStereoConfig() + m.getBasetype().getName() ; String ringstart = ( m.getRingStart() == -1 ) ? UNKNOWN_RING_POSITION : String.valueOf( m.getRingStart() ) ; String ringend = ( m.getRingEnd() == -1 ) ? UNKNOWN_RING_POSITION : String.valueOf( m.getRingEnd() ) ; String modificationList = getSequence( m.getModifications() ); String name = join( "-", basetype, m.getSuperclass(), ringstart + ":" + ringend ); return ( modificationList.length() > 0 ) ? name + modificationList : name ; } /** * Returns a string representing the passed Modification in * GlycoCT format. */ public String getSequence( Modification m ) { if ( m.hasPositionTwo() ) { return m.getPositionOne() + "," + m.getPositionTwo() + ":" + m.getName() ; } else { return m.getPositionOne() + ":" + m.getName(); } } /** * Returns a string representing the passed list of * Substituents in GlycoCT format. */ String getSequence( List<Modification> modifications ) { if ( modifications == null || modifications.size() == 0 ) return ""; List<String> mod_name_list = new ArrayList<String>( modifications.size() ); for ( Modification m : modifications ) mod_name_list.add( getSequence( m ) ); // mjh: terminus position is the first character of the generated // mod name, therefore we can take advantage of the built-in // comparator for sorting strings instead of writing our own :-) Collections.sort( mod_name_list ); return "|" + join( "|", mod_name_list ); } /** {@inheritDoc} @see SequenceFormat#getSequence(Residue) */ public String getSequence( Residue r ) { return r.getName(); } /** * TODO for GlycoCT */ public String getSequence( Substituent s ) { return s.getName(); } } // end class