/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ package org.eurocarbdb.application.glycanbuilder; import java.util.*; import java.util.regex.*; /** Read glycan structures from strings in LINUCS format. Writing is not supported. @author Alessio Ceroni (a.ceroni@imperial.ac.uk) */ public class LinucsParser implements GlycanParser { static Pattern linucs_type_pattern; static Pattern linucs_link_pattern; static { linucs_type_pattern = Pattern.compile("^(?:([abo\\?])-)?(?:([dl\\?])-)?((?:[1-9](?:\\,[1-9])*-?deoxy-)?(?:[1-9](?:\\,[1-9])*-?en-)?(?:[1-9](?:\\,[1-9])*-?anhydro-)?[a-z]{3}(?:hex)?)([fp])?([an]?)((?:[1-9n][a-z]{1,3})?)((?:[1-9n][a-z]{1,3})?)((?:[1-9n][a-z]{1,3})?)(-ol)?$"); linucs_link_pattern = Pattern.compile("^\\((.+)\\+(.+)\\)$"); } public void setTolerateUnknown(boolean f) { } public String writeGlycan(Glycan structure) { return null; } public Glycan readGlycan(String str, MassOptions default_mass_options) throws Exception { return new Glycan(readSubtree(str),true,default_mass_options); } static private Residue readSubtree(String str) throws Exception { // parse node int endlink = TextUtils.findEnclosed(str,0,'[',']'); if( endlink==-1 ) throw new Exception("Invalid input: " + str); int endtype = TextUtils.findEnclosed(str,endlink+1,'[',']'); if( endtype==-1 ) throw new Exception("Invalid input: " + str); // save node info String link = str.substring(1,endlink); String type = str.substring(endlink+2,endtype).toUpperCase(); // parse children int endchild = TextUtils.findEnclosed(str,endtype+1,'{','}'); if( endchild==-1 || endchild!=str.length()-1 ) throw new Exception("Invalid input: " + str); str = str.substring(endtype+2,endchild); Vector<Residue> parsed_children = new Vector<Residue>(); while( str.length()>0 ) { endlink = TextUtils.findEnclosed(str,0,'[',']'); if( endlink==-1 ) throw new Exception("Invalid child string: " + str); endtype = TextUtils.findEnclosed(str,endlink+1,'[',']'); if( endtype==-1 ) throw new Exception("Invalid child string: " + str); endchild = TextUtils.findEnclosed(str,endtype+1,'{','}'); if( endchild==-1 ) throw new Exception("Invalid child string: " + str); parsed_children.add(readSubtree(str.substring(0,endchild+1))); str = str.substring(endchild+1); } // create residue Residue parent = null; if( parsed_children.size()==1 ) { parent = createResidueFromLINUCS(link,type,true); if( parent==null ) return parsed_children.elementAt(0); } else parent = createResidueFromLINUCS(link,type,false); if( !parent.canHaveChildren() && parsed_children.size()>0) throw new Exception("Linking to non parentable"); // add children for( Residue child : parsed_children ) { if( child.getParentLinkage()!=null ) parent.addChild(child,child.getParentLinkage().getBonds()); else parent.addChild(child); } return parent; } static private Residue createResidueFromLINUCS(String _link, String _type, boolean skip_unknown) throws Exception { _type = TextUtils.squeezeAll(_type.toLowerCase(), ' ').replace(' ','_'); // parse type Matcher mt = linucs_type_pattern.matcher(_type); if( !mt.matches() ) { if( _type.equals("p") ) return ResidueDictionary.newResidue("P"); if( _type.equals("sulfate") ) return ResidueDictionary.newResidue("S"); if( _type.equals("methyl") ) return ResidueDictionary.newResidue("Me"); if( _type.equals("2-aminopyridine") ) return ResidueDictionary.newResidue("2AP"); if( skip_unknown ) return null; throw new Exception("Invalid type: " + _type); } // create residue Residue ret = new Residue(); ret.setAnomericState(getProperty(mt.group(1),"ab")); ret.setChirality(getProperty(mt.group(2), "DL")); String residue_type = mt.group(3) + mt.group(5); ret.setRingSize(getProperty(mt.group(4),"pf")); // parse modifications boolean alditol = false; int start_mod = 6; int end_mod = mt.groupCount(); if( mt.group(end_mod)!=null && mt.group(end_mod).equals("-ol") ) { alditol = true; end_mod--; } Vector<String> modifications = new Vector<String>(); for(int i=start_mod; i<=end_mod; i++ ) { if( mt.group(i)!=null && mt.group(i).length()>0 ) { if( mt.group(i).equals("nac") && (residue_type.equals("gal") || residue_type.equals("glc") || residue_type.equals("man")) ) residue_type = residue_type + mt.group(i); else if( residue_type.equals("neu") && (mt.group(i).equals("5ac") || mt.group(i).equals("5gc")) ) residue_type = residue_type + mt.group(i).substring(1); // remove 5 position else modifications.add(mt.group(i)); } } // set residue type if( skip_unknown && !ResidueDictionary.hasResidueType(residue_type) ) return null; ret.setType(ResidueDictionary.getResidueType(residue_type)); // add modifications for( String sub : modifications ) { ret.addChild( ResidueDictionary.newResidue(sub.substring(1)), Character.toUpperCase(sub.charAt(0)) ); } // parse link if( _link.length()>0 ) { Matcher ml = linucs_link_pattern.matcher(_link); if( !ml.matches() ) throw new Exception("Invalid link: " + _link); if( ml.group(1)!=null && ml.group(1).length()==1 && Character.isDigit(ml.group(1).charAt(0)) ) { if( ml.group(1).charAt(0)=='0' ) ret.setParentLinkage(new Linkage(null,ret)); else ret.setParentLinkage(new Linkage(null,ret, ml.group(1).charAt(0))); } if( ml.group(2)!=null && ml.group(2).length()==1 && Character.isDigit(ml.group(2).charAt(0)) && ml.group(2).charAt(0)!='0' ) ret.setAnomericCarbon(ml.group(2).charAt(0)); } return ret; } static private char getProperty(String value, String domain) { if( value==null || value.length()>1 ) return '?'; char v = value.toLowerCase().charAt(0); int ind = domain.toLowerCase().indexOf(v); if( ind==-1 ) return '?'; return domain.charAt(ind); } }