/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ /** @author Alessio Ceroni (a.ceroni@imperial.ac.uk) */ package org.eurocarbdb.application.glycoworkbench.plugin; import org.eurocarbdb.application.glycoworkbench.*; import org.eurocarbdb.application.glycanbuilder.*; import java.util.*; import java.util.regex.*; public class GAGType { protected String family; protected String motif; protected int unit_size; protected TreeSet<GAGPosition> acetyl_pos; protected TreeSet<GAGPosition> opt_acetyl_pos; protected TreeSet<GAGPosition> sulfate_pos; protected TreeSet<GAGPosition> cooccurring_pos; protected String description; public GAGType() { family = ""; motif = ""; unit_size = 0; acetyl_pos = new TreeSet<GAGPosition>(); opt_acetyl_pos = new TreeSet<GAGPosition>(); sulfate_pos = new TreeSet<GAGPosition>(); cooccurring_pos = new TreeSet<GAGPosition>(); description = ""; } public GAGType(String init) throws Exception { Vector<String> tokens = TextUtils.tokenize(init,"\t"); if( tokens.size()!=7 ) throw new Exception("Invalid string format: " + init); family = tokens.elementAt(0); motif = tokens.elementAt(1); unit_size = Integer.parseInt(tokens.elementAt(2)); acetyl_pos = parsePositions(tokens.elementAt(3)); opt_acetyl_pos = parsePositions(tokens.elementAt(4)); sulfate_pos = parsePositions(tokens.elementAt(5)); description = tokens.elementAt(6); cooccurring_pos = intersect(acetyl_pos,sulfate_pos); } public GAGType clone() { GAGType ret = new GAGType(); ret.family = this.family; ret.motif = this.motif; ret.unit_size = this.unit_size; ret.acetyl_pos = new TreeSet(this.acetyl_pos); ret.opt_acetyl_pos = new TreeSet(this.opt_acetyl_pos); ret.sulfate_pos = new TreeSet(this.sulfate_pos); ret.cooccurring_pos = new TreeSet(this.cooccurring_pos); ret.description = this.description; return ret; } public GAGType allowUnlikelyAcetylation() { GAGType ret = this.clone(); ret.acetyl_pos = ret.opt_acetyl_pos; ret.opt_acetyl_pos = new TreeSet<GAGPosition>(); return ret; } public GAGType applyModifications(String[] modifications) { GAGType ret = this.clone(); for( int i=0; i<modifications.length; i++ ) { if( modifications[i].equals(GAGOptions.DE_2_SULFATION) ) removePosition(ret.sulfate_pos,'2'); else if( modifications[i].equals(GAGOptions.DE_6_SULFATION) ) removePosition(ret.sulfate_pos,'6'); else if( modifications[i].equals(GAGOptions.DE_N_SULFATION) ) removePosition(ret.sulfate_pos,'N'); else if( modifications[i].equals(GAGOptions.RE_ACETYLATION) ) { removePosition(ret.sulfate_pos,'N'); ret.acetyl_pos.addAll(ret.opt_acetyl_pos); } } ret.cooccurring_pos = intersect(ret.acetyl_pos,ret.sulfate_pos); return ret; } // member access public String getFamily() { return family; } // operations public Glycan getMotifStructure(GAGOptions opt) { try { String backbone = generateBackbone(1,opt.IS_REDUCED); Collection<GAGPosition> s_pos = repeat(sulfate_pos,1); Collection<GAGPosition> ac_pos = new Vector<GAGPosition>(); if( opt.containsModification(GAGOptions.RE_ACETYLATION) ) ac_pos = repeat(acetyl_pos,1); return generateStructure(backbone,ac_pos,s_pos,1,0,opt.DERIVATIZATION,opt.IS_UNSATURATED); } catch(Exception e) { LogUtils.report(e); return new Glycan(); } } public int getMaxNoAcetyls(int no_units) { no_units = Math.max(0,no_units); return (no_units*acetyl_pos.size()); } public int getMaxNoSulfates(int no_units, int no_acetyls) { no_units = Math.max(0,no_units); int max_no_acetyls = no_units*acetyl_pos.size(); int max_no_sulfates = no_units*sulfate_pos.size(); int no_cooccurring = no_units*cooccurring_pos.size(); no_acetyls = Math.min(no_acetyls,max_no_acetyls); return max_no_sulfates - Math.max(0,no_acetyls - (max_no_acetyls - no_cooccurring)); } public FragmentCollection generateStructures(GAGOptions opt) { FragmentCollection ret = new FragmentCollection(); generateStructures(ret,null,opt); return ret; } public void generateStructures(GeneratorListener listener, GAGOptions opt) { generateStructures(null,listener,opt); } public void generateStructures(FragmentCollection buffer, GeneratorListener listener, GAGOptions opt) { for( int no_units=opt.MIN_NO_UNITS; no_units<=opt.MAX_NO_UNITS; no_units++ ) { if( no_units>0 ) { // generate backbone String backbone = generateBackbone(no_units,opt.IS_REDUCED); // establish limits int max_no_acetyls = 0; int min_no_acetyls = 0; if( opt.containsModification(GAGOptions.RE_ACETYLATION) ) min_no_acetyls = max_no_acetyls = getMaxNoAcetyls(no_units); else { max_no_acetyls = Math.min(opt.MAX_NO_ACETYLS,getMaxNoAcetyls(no_units)); min_no_acetyls = Math.min(opt.MIN_NO_ACETYLS,max_no_acetyls); } // generate for( int no_acetyls = min_no_acetyls; no_acetyls<=max_no_acetyls; no_acetyls++ ) { // establish limits int max_no_sulfates = Math.min(opt.MAX_NO_SULFATES,getMaxNoSulfates(no_units,no_acetyls)); int min_no_sulfates = Math.min(opt.MIN_NO_SULFATES,max_no_sulfates); // generate for( int no_sulfates=min_no_sulfates; no_sulfates<=max_no_sulfates; no_sulfates++ ) { if( !generateStructures(buffer,listener,backbone,no_units,no_acetyls,no_sulfates,opt.DERIVATIZATION,opt.IS_UNSATURATED,opt.ALLOW_REDEND_LOSS) ) return; } } } } } private void removePosition( Collection<GAGPosition> pos_coll, char link_pos ) { for( Iterator<GAGPosition> i = pos_coll.iterator(); i.hasNext(); ) { GAGPosition pos = i.next(); if( pos.linkage_pos==link_pos ) i.remove(); } } private boolean generateStructures(FragmentCollection buffer, GeneratorListener listener, String backbone, int no_units, int no_acetyl, int no_sulfates, String derivatization, boolean unsaturated, boolean allow_redend_loss) { // create positions Vector<Union<GAGPosition>> combinations = new Vector<Union<GAGPosition>>(); enumerateCombinations(combinations, new Union<GAGPosition>(), repeat(acetyl_pos,no_units), 0, no_acetyl ); Union<GAGPosition> all_s_pos = repeat(sulfate_pos,no_units); for(Union<GAGPosition> ac_pos : combinations) { // generate structure Glycan structure = generateStructure(backbone,ac_pos,all_s_pos,no_units,no_sulfates,derivatization,unsaturated); // generate name String name = "dp" + (no_units*2) + (new TypePattern().and("Ac",ac_pos.size()).and("S",no_sulfates).toString()); // add to list if( buffer!=null ) buffer.addFragment(structure,name); if( listener!=null ) { if( !listener.generatorCallback(new FragmentEntry(structure,name)) ) return false; } if( allow_redend_loss ) { // remove redend structure = removeReducingEnd(structure); // count remaining labiles int no_ac = 0; for( GAGPosition pos : ac_pos ) { if( pos.residue_id!=0 ) no_ac++; } int no_s = structure.countResidues("S"); // generate name name = "dp" + (no_units*2-1) + (new TypePattern().and("Ac",no_ac).and("S",no_s).toString()); // add to list if( buffer!=null ) buffer.addFragment(structure,name); if( listener!=null ) { if( !listener.generatorCallback(new FragmentEntry(structure,name)) ) return false; } } } return true; } private Glycan removeReducingEnd(Glycan structure) { Glycan ret = structure.clone(); Residue toremove = ret.getRoot().firstChild(); // remove substituents and placeholders int removed_placeholders = 0; Vector<Residue> toremove_children = new Vector<Residue>(); for( Linkage l : toremove.getChildrenLinkages() ) { if( !l.getChildResidue().isSaccharide() ) toremove_children.add(l.getChildResidue()); // prevent concurrent exception if( l.getChildResidue().isLCleavage() ) removed_placeholders++; } for(Residue r : toremove_children ) toremove.removeChild(r); // remove reducing end ret.getRoot().removeChild(ret.getRoot().firstChild()); // fix labiles if( removed_placeholders>0 ) { TypePattern detached = ret.getDetachedLabilesPattern(); if( detached.size()>0 && detached.contains(ret.getLabilePositionsPattern()) ) ret = ret.removeDetachedLabiles().reattachAllLabileResidues(); } return ret; } private Glycan generateStructure(String backbone, Collection<GAGPosition> ac_pos, Collection<GAGPosition> s_pos, int no_units, int no_sulfates, String derivatization, boolean unsaturated) { try { Glycan structure = Glycan.fromString(backbone,createMassOptions(derivatization)); if( unsaturated ) makeUnsaturated(structure); int id = 0; boolean attach_sulfates = (no_sulfates == getMaxNoSulfates(no_units,ac_pos.size())); for( Residue nav = structure.getRoot().firstSaccharideChild(); nav!=null; nav = nav.firstSaccharideChild() ) { // add acetyl for( GAGPosition pos : ac_pos ) { if( id==pos.residue_id ) { if( pos.linkage_pos!='N' || !toHexNAc(nav) ) nav.addChild(ResidueDictionary.newResidue("Ac"),pos.linkage_pos); } } // add sulfates for( GAGPosition pos : s_pos ) { if( id==pos.residue_id && !ac_pos.contains(pos) ) { if( attach_sulfates ) { // add sulfate nav.addChild(ResidueDictionary.newResidue("S"),pos.linkage_pos); } else { // add placeholder Residue l_cleav = new Residue(ResidueType.createLCleavage()); l_cleav.setCleavedResidue(ResidueDictionary.newResidue("S")); nav.addChild(l_cleav,pos.linkage_pos); } } } id++; } if( !attach_sulfates ) { // add sulfates for( int i=0; i<no_sulfates; i++ ) structure.addAntenna(ResidueDictionary.newResidue("S")); } return structure; } catch(Exception e) { LogUtils.report(e); return new Glycan(); } } public String generateBackbone(int nounits, boolean reduced) { String backbone = (reduced) ?"redEnd" :"freeEnd"; for( int i=0; i<nounits; i++ ) backbone += "--" + motif; return backbone; } public void makeUnsaturated(Glycan structure) { if( structure==null || structure.getRoot()==null ) return; // find last residue Residue nav = structure.getRoot(); while(true) { // search for a saccharide children Residue child = null; for( Linkage l : nav.getChildrenLinkages() ) { if( l.getChildResidue().isSaccharide() ) { child = l.getChildResidue(); break; } } if( child==null ) break; nav = child; } // make unsaturation char pos = '?'; if( structure.getRoot().firstLinkage()!=null ) pos = structure.getRoot().firstLinkage().getParentPositionsSingle(); try { if( ResidueDictionary.findResidueType(pos + "u" + nav.getTypeName())!=null ) nav.setType(ResidueDictionary.getResidueType(pos + "u" + nav.getTypeName())); else nav.addChild(ResidueDictionary.newResidue("un"),pos); } catch( Exception e ) { LogUtils.report(e); } } public Union<GAGPosition> repeat(Collection<GAGPosition> positions, int times) { Union<GAGPosition> ret = new Union<GAGPosition>(); for( int i=0; i<times; i++ ) for( GAGPosition pos : positions ) ret.add(pos.translate(i*unit_size)); return ret; } public void enumerateCombinations(Vector<Union<GAGPosition>> buffer, Union<GAGPosition> combination, Union<GAGPosition> collection, int ind, int combination_size) { if( combination.size()==combination_size ) { buffer.add(combination); return; } // add enumerateCombinations(buffer,combination.and(collection.elementAt(ind)),collection,ind+1,combination_size); // don't add (if possible) if( (collection.size()-ind)>(combination_size-combination.size()) ) enumerateCombinations(buffer,combination,collection,ind+1,combination_size); return; } public boolean toHexNAc(Residue r) { if( r==null ) return false; try { if( r.getTypeName().equals("HexN") ) { r.setType(ResidueDictionary.getResidueType("HexNAc")); return true; } if( r.getTypeName().equals("GalN") ) { r.setType(ResidueDictionary.getResidueType("GalNAc")); return true; } if( r.getTypeName().equals("GlcN") ) { r.setType(ResidueDictionary.getResidueType("GlcNAc")); return true; } if( r.getTypeName().equals("ManN") ) { r.setType(ResidueDictionary.getResidueType("ManNAc")); return true; } return false; } catch(Exception e) { LogUtils.report(e); return false; } } private MassOptions createMassOptions(String derivatization) { MassOptions ret = new MassOptions(); ret.DERIVATIZATION = derivatization; ret.ION_CLOUD = new IonCloud(); return ret; } // serialization private TreeSet<GAGPosition> intersect(TreeSet<GAGPosition> s1, TreeSet<GAGPosition> s2) { TreeSet<GAGPosition> ret = new TreeSet<GAGPosition>(); for( GAGPosition p : s1 ) if( s2.contains(p) ) ret.add(p); return ret; } private TreeSet<GAGPosition> parsePositions(String str) throws Exception { TreeSet<GAGPosition> ret = new TreeSet<GAGPosition>(); if( str.equals("-") ) return ret; Vector<String> tokens = TextUtils.tokenize(str,","); for( String token : tokens ) ret.add(GAGPosition.fromString(token)); return ret; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append(family); sb.append('\t'); sb.append(motif); sb.append('\t'); sb.append(TextUtils.toString(acetyl_pos,',')); sb.append('\t'); sb.append(TextUtils.toString(sulfate_pos,',')); return sb.toString(); } } class GAGPosition implements Comparable<GAGPosition> { static Pattern pattern; static { pattern = Pattern.compile("^([0-9]+)\\#([1-9N])$"); } public int residue_id = 0; public char linkage_pos = 'N'; public GAGPosition() { } public boolean equals(Object obj) { if( !(obj instanceof GAGPosition) ) return false; GAGPosition o = (GAGPosition)obj; return ( residue_id==o.residue_id && linkage_pos==o.linkage_pos ); } public int compareTo(GAGPosition o) { if( o==null ) return 1; if( residue_id>o.residue_id ) return 1; if( residue_id==o.residue_id && linkage_pos>o.linkage_pos ) return 1; if( residue_id==o.residue_id && linkage_pos==o.linkage_pos ) return 0; return -1; } public GAGPosition translate(int by) { GAGPosition ret = new GAGPosition(); ret.residue_id = this.residue_id+by; ret.linkage_pos = this.linkage_pos; return ret; } static GAGPosition fromString(String str) throws Exception { GAGPosition ret = new GAGPosition(); Matcher m = pattern.matcher(str); if( !m.matches() ) throw new Exception("Invalid format for position: " + str); ret.residue_id = Integer.parseInt(m.group(1)); ret.linkage_pos = m.group(2).charAt(0); return ret; } public String toString() { StringBuilder sb = new StringBuilder(); sb.append(residue_id); sb.append('#'); sb.append(linkage_pos); return sb.toString(); } }