/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ package org.eurocarbdb.application.glycanbuilder; import org.eurocarbdb.MolecularFramework.sugar.Sugar; import java.util.*; import java.util.regex.*; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.xml.sax.Attributes; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; import javax.xml.transform.sax.TransformerHandler; /** This class contains all information about an intact or fragmented glycan molecule. The glycan can be partially specified. Uncertain linkages, residue superclasses, uncertain connections are supported. All residues whose parent is not known are connected to a special <i>bracket</i> residue. A glycan molecule where all residues are connected to the bracket represents a glycan composition. @see Residue @see Linkage @author Alessio Ceroni (a.ceroni@imperial.ac.uk) */ public class Glycan implements Comparable, SAXUtils.SAXWriter { //---------------------- // members private Residue root = null; private Residue bracket = null; private MassOptions mass_options = new MassOptions(); // ------------------------ // construction /** Empty constructor. */ public Glycan() { } /** Create a new glycan structure from a specified tree of residues. @param _root the root of the tree representing the new structure @param add_redend if <code>true</code> a reducing end modifier will be added in case the current tree does not have one @param mass_opt mass settings for this glycan structure */ public Glycan(Residue _root, boolean add_redend, MassOptions mass_opt) { // set root if( _root==null ) root = null; else if( _root.isReducingEnd() ) { // all types of reducing ends even fragments if( _root.hasChildren() || _root.isRingFragment() ) root = _root; else root = null; } else if( !add_redend ) root = _root; else { root = ResidueDictionary.createReducingEnd(); root.addChild(_root); } // set bracket bracket = null; // set mass options if( mass_opt!=null ) mass_options.setValues(mass_opt); if( !mass_options.getReducingEndType().isFreeReducingEnd() ) setReducingEndType(mass_options.getReducingEndType()); else mass_options.synchronize(this); } /** Create a new glycan structure from a specified tree of residues. @param _root the root of the tree representing the new structure @param _bracket the container for all residues with undefined connectivity @param add_redend if <code>true</code> a reducing end modifier will be added in case the current tree does not have one @param mass_opt mass settings for this glycan structure */ public Glycan(Residue _root, Residue _bracket, boolean add_redend, MassOptions mass_opt) { // set root if( _root==null ) root = null; else if( _root.isReducingEnd() ) { // all types of reducing ends even fragments if( _bracket!=null || _root.hasChildren() || _root.isRingFragment() ) root = _root; else root = null; } else if( !add_redend ) root = _root; else { root = ResidueDictionary.createReducingEnd(); root.addChild(_root); } // set bracket if( root!=null ) bracket = _bracket; else bracket = null; // set mass options if( mass_opt!=null ) mass_options.setValues(mass_opt); if( !mass_options.getReducingEndType().isFreeReducingEnd() ) setReducingEndType(mass_options.getReducingEndType()); else mass_options.synchronize(this); } /** Create an empty glycan object representing a composition. @param mass_opt mass settings for this glycan structure */ static public Glycan createComposition(MassOptions mass_opt) { return new Glycan(ResidueDictionary.createReducingEnd(),ResidueDictionary.createBracket(),true,mass_opt); } /** Return the composition of this glycan structure. All residues are trasformed into their superclasses. @return a new glycan object representing the composition */ public Glycan getComposition() { return getComposition(true); } /** Retrieve the composition of this glycan structure. @param show_superclasses if <code>true</code> all residues are trasformed into their superclasses. @return a new glycan object representing the composition */ public Glycan getComposition(boolean show_superclasses) { try { Glycan ret = createComposition(this.mass_options); for( Residue r : getAllResidues() ) { if( r.getType().isAttachPoint() || r.getType().isBracket() ) continue; else if( r.getType().isCleavage() ) { if( r.getType().canBeReducingEnd() ) ret.setRoot(r.cloneResidue()); else ret.addAntenna(r.cloneResidue()); } else if( r.isReducingEnd() ) ret.setRoot(r.cloneResidue()); else { if( show_superclasses ) ret.addAntenna(ResidueDictionary.newResidue(r.getType().getCompositionClass())); else ret.addAntenna(ResidueDictionary.newResidue(r.getType().getName())); } } return ret; } catch(Exception e) { return createComposition(this.mass_options); } } /** Return a collection of all residues in the structure. */ public Collection<Residue> getAllResidues() { Vector<Residue> ret = new Vector<Residue>(); getAllResidues(ret,root); getAllResidues(ret,bracket); return ret; } private void getAllResidues(Vector<Residue> dest, Residue node) { if( node!=null ) { dest.add(node); for( Linkage l : node.getChildrenLinkages() ) getAllResidues(dest,l.getChildResidue()); } } /** Compare this glycan to another object @see Comparable#compareTo */ public int compareTo(Object o) { if( o==null || !(o instanceof Glycan)) return 1; String s1 = this.toStringOrdered(); String s2 = ((Glycan)o).toStringOrdered(); return s1.compareTo(s2); } /** Compare this glycan to another object ignoring the charge configuration. @see Comparable#compareTo */ public int compareToIgnoreCharges(Object o) { if( o==null || !(o instanceof Glycan)) return 1; String s1 = this.toStringOrdered(false); String s2 = ((Glycan)o).toStringOrdered(false); return s1.compareTo(s2); } /** Create a new glycan object that is a copy of the current one. */ public Glycan clone() { return clone(false); } /** Create a new glycan object that is a copy of the current one. @param add_redend if <code>true</code> a reducing end modifier will be added in case the current tree does not have one */ public Glycan clone(boolean add_redend) { Glycan ret = null; if( root==null ) ret = new Glycan(null,add_redend,this.mass_options); else ret = new Glycan(root.cloneSubtree(),(bracket!=null) ?this.bracket.cloneSubtree() :null,add_redend,this.mass_options); return ret; } /** Return <code>true</code> if the two glycan structures have the same structure. The <code>equals</code> method from the <code>Object</code> class is not redefined. */ public boolean equalsStructure(Glycan og) { if( og==null ) return false; if( root==null ) return (og.root==null); if( !root.subtreeEquals(og.root) ) return false; if( bracket==null ) return og.bracket==null; return bracket.subtreeEquals(og.bracket); } // Access members /** Return the mass settings for the glycan structure. */ public MassOptions getMassOptions() { return mass_options; } /** Set the mass settings for the glycan structure. */ public boolean setMassOptions(MassOptions mass_opt) { boolean changed = mass_options.setValues(mass_opt); changed |= setReducingEndType(mass_options.getReducingEndType()); return changed; } /** Set the reducing end marker for this glycan structure. The mass settings are updated. */ public boolean setReducingEndType(ResidueType new_type) { Residue redend = getRoot(); if( redend!=null && redend.isReducingEnd() && !redend.isCleavage() && !redend.getTypeName().equals(new_type.getName()) ) { redend.setType(new_type); return true; } return false; } /** Reset the reducing end marker to a free reducing end. */ public void removeReducingEndModification() { setReducingEndType(ResidueType.createFreeReducingEnd()); } /** Return a copy of the current glycan structure with a free reducing end. */ public Glycan withNoReducingEndModification() { Glycan ret = this.clone(); ret.removeReducingEndModification(); return ret; } /** Return the root residue. */ public Residue getRoot() { return root; } /** Return the root residue excluding free reducing end markers and attach points. */ public Residue getRoot(boolean ret_redend) { if( root==null ) return null; // if( ret_redend || !(root.getTypeName().equals("freeEnd") || root.getTypeName().equals("redEnd") || root.getTypeName().equals("#attach")) ) if( ret_redend || !(root.getTypeName().equals("freeEnd") || root.getTypeName().equals("redEnd") ||root.getTypeName().equals("#attach")) ) return root; return root.firstChild(); } /** Set the root residue. */ public void setRoot(Residue new_root) { if( new_root==null || new_root.isReducingEnd() ) root = new_root; } /** Return the bracket residue @see ResidueType#createBracket */ public Residue getBracket() { return bracket; } /** Return <code>true</code> if the structure contains no residues. */ public boolean isEmpty() { if( root==null ) return true; if( root.hasChildren() ) return false; if( bracket==null ) return true; if( bracket.hasChildren() ) return false; return true; } /** Return <code>true</code> if this object represents a glycan composition. */ public boolean isComposition() { return (bracket!=null && root!=null && !root.hasChildren()); } /** Return <code>true</code> if some residue have unspecified connectivity. */ public boolean isFuzzy() { return isFuzzy(false); } /** Return <code>true</code> if some residue have unspecified connectivity. If <code>tolerate_labiles</code> is <code>true</code> the labile residues with unspeficied connectivity will not be considered. @see #detachLabileResidues */ public boolean isFuzzy(boolean tolerate_labiles) { if( bracket==null ) return false; if( !tolerate_labiles ) return true; // check if all children of bracket are labiles for( Linkage l : bracket.getChildrenLinkages() ) if( !l.getChildResidue().isLabile() ) return true; // check if all labiles can be assigned to positions in the structure TypePattern lp = getDetachedLabilesPattern(); TypePattern lpp = getLabilePositionsPattern(); return !lpp.contains(lp); } /** Return <code>true</code> if this structure is a fragment of a glycan. */ public boolean isFragment() { return isFragmentSubtree(root); } private boolean isFragmentSubtree(Residue node) { if( node.isCleavage() ) return true; for( Linkage link : node.getChildrenLinkages() ) { if( isFragmentSubtree(link.getChildResidue()) ) return true; } return false; } /** Return <code>true</code> if this structure contains repeat blocks. */ public boolean hasRepetition() { return hasRepetition(root) || hasRepetition(bracket); } private boolean hasRepetition(Residue node) { if( node==null ) return false; if( node.isRepetition() ) return true; for( Linkage link : node.getChildrenLinkages() ) { if( hasRepetition(link.getChildResidue()) ) return true; } return false; } /** Return the number of uncertain antennae of this structure, i.e. the number of children of the bracket residue. */ public int getNoAntennae() { if( bracket==null ) return 0; return bracket.getChildrenLinkages().size(); } /** Return the {@link Linkage linkages} to the uncertain antennae. */ public Vector<Linkage> getAntennaeLinkages() { if( bracket!=null ) return bracket.getChildrenLinkages(); return new Vector<Linkage>(); } /** Return <code>true</code> if the structure contain a residue matching <code>node</code> @see Residue#subtreeContains */ public boolean contains(Residue node) { return ( (root!=null && root.subtreeContains(node)) || (bracket!=null && bracket.subtreeContains(node)) ); } /** Return <code>true</code> if all linkages are valid and specified. @see Residue#checkLinkagesSubtree */ public boolean checkLinkages() { if( root!=null && !root.checkLinkagesSubtree() ) return false; if( bracket!=null && !bracket.checkLinkagesSubtree() ) return false; return true; } /** Return <code>true</code> if all linkages are specified. @return <code>false</code> if some residues have unspecified connectivity @see Residue#isFullySpecifiedSubtree */ public boolean isFullySpecified() { if( isFuzzy() ) return false; return root.isFullySpecifiedSubtree(); } static protected Vector<Residue> getPath(Residue a, Residue b) { //if( isEmpty() ) //return new Vector<Residue>(); // get paths from A to root Residue nav; Stack<Residue> pra = new Stack<Residue>(); for(nav=a; nav!=null; nav = nav.getParent()) pra.push(nav); // get paths from B to root Stack<Residue> prb = new Stack<Residue>(); for(nav=b; nav!=null; nav = nav.getParent()) prb.push(nav); // check if root is the same if( pra.peek()!=prb.peek() ) return new Vector<Residue>(); // remove common steps Residue common = null; while( !pra.empty() && !prb.empty() && pra.peek()==prb.peek() ) { common = pra.pop(); prb.pop(); } pra.push(common); // create path Vector<Residue> path = new Vector<Residue>(); for(Iterator<Residue> i=pra.iterator(); i.hasNext(); ) path.add(i.next()); while(!prb.empty()) path.add(prb.pop()); return path; } /** Return the maximum distance between the root and a leaf. */ public int getDepth() { return getDepth(root) + (getDepth(bracket)-1); } private int getDepth(Residue current) { if( current==null ) return 0; int depth = 0; for( Linkage l : current.getChildrenLinkages() ) depth = Math.max(depth,getDepth(l.getChildResidue())); return depth+1; } /** Return the number of residues in the structure. */ public int getCount() { return getCount(root) + (getCount(bracket)-1); } private int getCount(Residue current) { if( current==null ) return 0; int tot_count = 1; for( Linkage l : current.getChildrenLinkages() ) tot_count += getCount(l.getChildResidue()); return tot_count; } /** Resete the preferred display placement for all residues in the structure. @see Residue#resetPreferredPlacement */ public void removePlacements() { removePlacements(root); removePlacements(bracket); } private void removePlacements(Residue current) { if( current==null ) return; current.resetPreferredPlacement(); for( Linkage l : current.getChildrenLinkages()) removePlacements(l.getChildResidue()); } /** Count the number of residues of a specific type */ public int countResidues(String typename) { return countResidues(root,typename) + countResidues(bracket,typename); } private int countResidues(Residue current, String typename) { int count = 0; if( current==null ) return count; if( current.getTypeName().equals(typename) ) count++; for( Linkage l : current.getChildrenLinkages()) count += countResidues(l.getChildResidue(),typename); return count; } /** Return <code>true</code> if this structure contain <code>other</code>. A full substructure search is performed, with fuzzy matching between residues. @param include_redend <code>true</code> if the matching part must begint from the reducing end. @param include_all_leafs <code>true</code> if the matching part must contain all the leaf of the other structure. @see Residue#fuzzyMatch */ public boolean contains(Glycan other, boolean include_redend, boolean include_all_leafs) { return (other==null || (countSubtree(this.getRoot(false),other.getRoot(false),include_redend,include_all_leafs,true)!=0 && contains(this.getBracket(),other.getBracket(),include_all_leafs)) || (other.getBracket()==null && (!include_redend || this.getRoot(false)==null) && countSubtree(this.getBracket(),other.getRoot(false),include_redend,include_all_leafs,true)!=0) ); } /** Return the number of times this structure contain <code>other</code>. A full substructure search is performed, with fuzzy matching between residues. @param include_redend <code>true</code> if the matching part must begint from the reducing end. @param include_all_leafs <code>true</code> if the matching part must contain all the leaf of the other structure. @see Residue#fuzzyMatch */ public int count(Glycan other, boolean include_redend, boolean include_all_leafs) { if( other==null ) return 1; int count = countSubtree(this.getRoot(false),other.getRoot(false),include_redend,include_all_leafs,false); if( count!=0 && contains(this.getBracket(),other.getBracket(),include_all_leafs) ) return count; else if( other.getBracket()==null && (!include_redend || this.getRoot(false)==null) ) return countSubtree(this.getBracket(),other.getRoot(false),include_redend,include_all_leafs,true); else return 0; } private int countSubtree(Residue container, Residue terminal, boolean include_redend, boolean include_all_leafs, boolean stop_at_first) { int count = 0; if( contains(container,terminal,include_all_leafs) ) { if( stop_at_first ) return 1; count = 1; } if( container==null ) { return count; } if( !include_redend ) { // explore the tree for( Linkage l : container.getChildrenLinkages() ) { count += countSubtree(l.getChildResidue(),terminal,false,include_all_leafs,stop_at_first); if( count!=0 && stop_at_first ) return 1; } } return count; } private boolean contains(Linkage container, Linkage other, boolean include_all_leafs) { if( !container.fuzzyMatch(other) ) { //System.out.println("link mismatch " + GWSParser.toStringLinkage(container) + " " + GWSParser.toStringLinkage(other) + " at " + GWSParser.writeResidueType(container.getParentResidue())); return false; } return contains(container.getChildResidue(), other.getChildResidue(), include_all_leafs); } private boolean contains(Residue container, Residue other, boolean include_all_leafs) { if( other==null ) { //System.out.println("other null: " + container); return (container==null || !include_all_leafs); } if( container==null ) { //System.out.println("empty container"); return false; } // match current nodes if( !container.fuzzyMatch(other) ) { //System.out.println("residue mismatch " + GWSParser.writeResidueType(container)); return false; } // match children if( (include_all_leafs && container.getNoChildren()!=other.getNoChildren()) || container.getNoChildren()<other.getNoChildren() ) { //System.out.println("number of children mismatch at " + GWSParser.writeResidueType(container) ); return false; } if( other.getNoChildren()==0 ) return true; // try all possible permutations with no repetitions PermutationGenerator cg = new PermutationGenerator(other.getNoChildren()); while( cg.hasMore() ) { int[] indices = cg.getNext(); //System.out.println(TextUtils.toString(indices,' ')); int matched = 0; for( int l=0,i=0; l<other.getNoChildren(); l++ ) { boolean contains = false; for( ; i<container.getNoChildren(); i++ ) { if( contains(container.getLinkageAt(i),other.getLinkageAt(indices[l]),include_all_leafs) ) { matched++; i++; break; } } } if( matched==other.getNoChildren() ) return true; // found all //else //System.out.println("matched " + matched + "/" + other.getNoChildren()); } //System.out.println("no permutations match at " + GWSParser.writeResidueType(container) ); return false; } // Modify structure /** Add a bracket residue to the structure. @see ResidueType#createBracket */ public Residue addBracket() { if( bracket==null ) { bracket = ResidueDictionary.createBracket(); return bracket; } return null; } /** Remove the bracket residue from the structure. @see ResidueType#createBracket */ public boolean removeBracket() { if( bracket==null ) return false; bracket = null; return true; } /** Add a uncertain antenna to the structure. @see ResidueType#createBracket @see Residue#addChild(Residue) */ public boolean addAntenna(Residue antenna) { return addAntenna(antenna,Bond.single()); } /** Add a uncertain antenna to the structure with a specific linkage position. @see ResidueType#createBracket @see Residue#addChild(Residue,char) */ public boolean addAntenna(Residue antenna, char parent_link_pos) { return addAntenna(antenna,Bond.single(parent_link_pos)); } /** Add a uncertain antenna to the structure with specific bonds. @see ResidueType#createBracket @see Residue#addChild(Residue,Collection) */ public boolean addAntenna(Residue antenna, Collection<Bond> bonds) { if( bracket==null ) addBracket(); return bracket.addChild(antenna,bonds); } /** Remove a residue from the structure. @return <code>true</code> if the operation was successful */ public boolean removeResidue(Residue toremove) { if( root==null ) return false; if( toremove==null || root==toremove ) return false; if( toremove==bracket ) { if( bracket.hasChildren() ) return false; return removeBracket(); } if( root.removeChild(toremove) ) { if( !root.hasChildren() ) root = null; return true; } if( bracket!=null && bracket.removeChild(toremove) ) return true; return false; } /** Remove a collection of residues from the structure. @return <code>true</code> if the operation was successful */ public boolean removeResidues(Collection<Residue> toremove) { boolean removed = false; boolean had_antennae = (bracket!=null && bracket.hasChildren()); for(Iterator<Residue> i=toremove.iterator(); i.hasNext(); ) { if( removeResidue(i.next()) ) removed = true; } if( !removed ) return false; if( had_antennae && bracket!=null && !bracket.hasChildren() ) removeResidue(bracket); return true; } protected void removeUnpairedRepetitions() { removeUnpairedRepetitions(root); removeUnpairedRepetitions(bracket); } private void removeUnpairedRepetitions(Residue cur) { if( cur==null ) return; if( (cur.isStartRepetition() && cur.findEndRepetition()==null) || (cur.isEndRepetition() && cur.findStartRepetition()==null) ) removeResidue(cur); for( int i=0; i<cur.getNoChildren(); i++ ) removeUnpairedRepetitions(cur.getChildAt(i)); } protected Vector<Glycan> splitMultipleRoots() { Vector<Glycan> new_structures = new Vector<Glycan>(); while(root.getNoChildren()>1) { // remove one child from the root Residue child = root.getChildAt(1); root.getChildrenLinkages().remove(1); child.setParentLinkage(null); // add new structure new_structures.add(new Glycan(child,true,mass_options)); } return new_structures; } // Functions /** Try attaching the uncertain antennae in all positions of the structure. @return all possible resulting configuration */ public Vector<Glycan> placeAntennae() { Vector<Glycan> ret = new Vector<Glycan>(); this.placeAntennae(ret); return ret; } /** Try attaching the uncertain antennae in all positions of the structure. @param structures all possible resulting configuration */ public void placeAntennae(Vector<Glycan> structures) { structures.clear(); if( bracket==null ) structures.add(this.clone()); else if( root!=null ) placeAntennae(root,root,new LinkedList<Linkage>(bracket.getChildrenLinkages()),structures); } private void placeAntennae(Residue root, Residue current, LinkedList<Linkage> antennae, Vector<Glycan> structures) { if( current==null ) return; if( antennae.size()==0 ) { structures.add(new Glycan(root,false,mass_options)); return; } // place antenna Linkage link = antennae.getFirst(); Residue antenna = link.getChildResidue(); Collection<Bond> ant_pos = link.getBonds(); if( current.isSaccharide() && current.canAddChild(antenna,ant_pos) ) { antennae.removeFirst(); Residue new_root = root.cloneSubtreeAdd(current,antenna.cloneSubtree(),ant_pos); placeAntennae(new_root,new_root,antennae,structures); antennae.addFirst(link); } // recursive traversal for( Linkage l : current.getChildrenLinkages() ) placeAntennae(root,l.getChildResidue(),antennae,structures); } /** Return <code>true</code> if this structure represent a fragment with no intact saccharides. */ public boolean isSmallRingFragment() { if( bracket!=null || root==null ) return false; if( root.isRingFragment() ) return !root.hasSaccharideChildren(); if( root.getNoChildren()==1 && root.firstChild().isRingFragment() ) return !root.firstChild().hasSaccharideChildren(); return false; } /** Return all cleavage markers. */ public Vector<Residue> getCleavages() { Vector<Residue> ret = new Vector<Residue>(); getCleavages(ret,root); return ret; } static private void getCleavages(Vector<Residue> buffer, Residue node) { if( node==null || buffer==null ) return; if( node.isCleavage() ) buffer.add(node); for( Linkage l : node.getChildrenLinkages() ) getCleavages(buffer,l.getChildResidue()); } // ---------- // mass computation /** Return the derivatization. @see MassOptions#DERIVATIZATION */ public String getDerivatization() { return mass_options.DERIVATIZATION; } /** Return the associated charges. @see MassOptions#ION_CLOUD */ public IonCloud getCharges() { return mass_options.ION_CLOUD; } /** Set the associated charges. @see MassOptions#ION_CLOUD */ public void setCharges(IonCloud ic) { if( ic!=null ) mass_options.ION_CLOUD = ic; else mass_options.ION_CLOUD = new IonCloud(); } /** Return the associated neutral exchanges. @see MassOptions#NEUTRAL_EXCHANGES */ public IonCloud getNeutralExchanges() { return mass_options.NEUTRAL_EXCHANGES; } /** Set the associated neutral exchanges. @see MassOptions#NEUTRAL_EXCHANGES */ public void setNeutralExchanges(IonCloud ne) { if( ne!=null ) mass_options.NEUTRAL_EXCHANGES = ne; else mass_options.NEUTRAL_EXCHANGES = new IonCloud(); } /** Count the number of charges associated to the structure */ public int countCharges() { return countChargesSubtree(root,false) + countChargesSubtree(bracket,false); } /** Count the number of charges associated to the structure @param allow_virtual_charges if <code>true</code> include in the count the charges associated with acidic groups */ public int countCharges(boolean allow_virtual_charges) { return countChargesSubtree(root,allow_virtual_charges) + countChargesSubtree(bracket,allow_virtual_charges); } static private int countChargesSubtree(Residue node, boolean allow_virtual_charges) { if( node==null ) return 0; int no_charges = node.getType().getNoCharges(); if( no_charges==0 && allow_virtual_charges && node.isLCleavage() ) no_charges = node.getCleavedResidue().getType().getNoCharges(); for( Linkage l : node.getChildrenLinkages() ) no_charges += countChargesSubtree(l.getChildResidue(),allow_virtual_charges); return no_charges; } /** Compute the mass of the molecule given the current mass settings. */ public double computeMass() { if( hasRepetition() ) return -1.; return computeMass(root) + computeMass(bracket); } /** Return the number of positions available for methylation. */ public int computeNoMethylPositions() { return computeNoMethylPositions(root) + computeNoMethylPositions(bracket); } /** Return the number of positions available for acetylation. */ public int computeNoAcetylPositions() { return computeNoAcetylPositions(root) + computeNoAcetylPositions(bracket); } /** Compute the mass-to-charge ratio given the current mass settings. */ public double computeMZ() { double mass = computeMass(); return mass_options.ION_CLOUD.and(mass_options.NEUTRAL_EXCHANGES).computeMZ(mass); } /** Compute the chemical formula for this structure. @see Molecule */ public Molecule computeMolecule() throws Exception { Molecule ret = new Molecule(); Molecule subst_mol = substitutionMolecule(); computeMolecule(ret,root,subst_mol); computeMolecule(ret,bracket,subst_mol); return ret; } /** Compute the chemical formula for this structure comprising associated charges and neutral exchanges. @see Molecule */ public Molecule computeIon() throws Exception { Molecule ret = computeMolecule(); ret.add(mass_options.ION_CLOUD.getMolecule()); ret.add(mass_options.NEUTRAL_EXCHANGES.getMolecule()); return ret; } /** Return all associated charges and neutral exchanges */ public IonCloud getChargesAndExchanges() { return mass_options.ION_CLOUD.and(mass_options.NEUTRAL_EXCHANGES); } private boolean isDropped(ResidueType type) { if( type.isDroppedWithMethylation() && (mass_options.DERIVATIZATION.equals(MassOptions.PERMETHYLATED) || mass_options.DERIVATIZATION.equals(MassOptions.PERDMETHYLATED)) ) return true; if( type.isDroppedWithAcetylation() && (mass_options.DERIVATIZATION.equals(MassOptions.PERACETYLATED) || mass_options.DERIVATIZATION.equals(MassOptions.PERACETYLATED)) ) return true; return false; } private int noSubstitutions(ResidueType type) { if( mass_options.DERIVATIZATION.equals(MassOptions.PERMETHYLATED) || mass_options.DERIVATIZATION.equals(MassOptions.PERDMETHYLATED) ) return type.getNoMethyls(); if( mass_options.DERIVATIZATION.equals(MassOptions.PERACETYLATED) || mass_options.DERIVATIZATION.equals(MassOptions.PERDACETYLATED) ) return type.getNoAcetyls(); return 0; } private double substitutionMass() { if( mass_options.DERIVATIZATION.equals(MassOptions.PERMETHYLATED) ) return (MassUtils.methyl.getMass() - MassUtils.hydrogen.getMass()); if( mass_options.DERIVATIZATION.equals(MassOptions.PERDMETHYLATED) ) return (MassUtils.dmethyl.getMass() - MassUtils.hydrogen.getMass()); if( mass_options.DERIVATIZATION.equals(MassOptions.PERACETYLATED) ) return (MassUtils.acetyl.getMass() - MassUtils.hydrogen.getMass()); if( mass_options.DERIVATIZATION.equals(MassOptions.PERDACETYLATED) ) return (MassUtils.dacetyl.getMass() - MassUtils.hydrogen.getMass()); return 0.; } private Molecule substitutionMolecule() throws Exception { if( mass_options.DERIVATIZATION.equals(MassOptions.PERMETHYLATED) ) return MassUtils.methyl.and(MassUtils.hydrogen,-1); if( mass_options.DERIVATIZATION.equals(MassOptions.PERDMETHYLATED) ) return MassUtils.dmethyl.and(MassUtils.hydrogen,-1); if( mass_options.DERIVATIZATION.equals(MassOptions.PERACETYLATED) ) return MassUtils.acetyl.and(MassUtils.hydrogen,-1); if( mass_options.DERIVATIZATION.equals(MassOptions.PERDACETYLATED) ) return MassUtils.dacetyl.and(MassUtils.hydrogen,-1); return new Molecule(); } private double computeMass(Residue node) { if( node==null ) return 0.; ResidueType type = node.getType(); int no_bonds = node.getNoBonds(); // add mass of the saccharide double mass = type.getMass(); // modify for alditol if( node.isReducingEnd() && node.getType().makesAlditol() ) mass += 2*MassUtils.hydrogen.getMass(); if( node.isBracket() ) { int no_linked_labiles = Math.min(countLabilePositions(),countDetachedLabiles()); mass -= (no_bonds-no_linked_labiles)*substitutionMass(); } else if( node.isCleavage() && !node.isRingFragment() ) { // cleavages have no derivatization if( node.isReducingEnd() && !node.hasChildren() ) { // fix for composition //mass += MassOptions.H2O; mass += substitutionMass(); } } else { // add groups if( isDropped(type) ) mass -= (type.getMass() - MassUtils.water.getMass() - substitutionMass()); else mass += (noSubstitutions(type)-no_bonds)*substitutionMass(); } // add children for( Linkage l : node.getChildrenLinkages() ) { mass -= MassUtils.water.getMass()*l.getNoBonds(); // remove a water molecule for each bond mass += computeMass(l.getChildResidue()); } return mass; } private void computeMolecule(Molecule ret, Residue node, Molecule substitution_molecule) throws Exception { if( node==null ) return; ResidueType type = node.getType(); int no_bonds = node.getNoBonds(); // add residue if( type.getMolecule()==null ) throw new Exception("Cannot compute molecule for residue: " + node.getTypeName()); ret.add(type.getMolecule()); // modify for alditol if( node.isReducingEnd() && node.getType().makesAlditol() ) ret.add(MassUtils.hydrogen,2); // fix for persubstitutions if( node.isBracket() ) { // modify for labiles int no_linked_labiles = Math.min(countLabilePositions(),countDetachedLabiles()); ret.remove(substitution_molecule,no_bonds-no_linked_labiles); } else if( node.isCleavage() && !node.isRingFragment() ) { // cleavages have no derivatization if( node.isReducingEnd() && !node.hasChildren() ) { // modify for composition ret.add(substitution_molecule); } } else { if( isDropped(type) ) { ret.remove(type.getMolecule()); ret.add(MassUtils.water); ret.add(substitution_molecule); } else ret.add(substitution_molecule,noSubstitutions(type)-no_bonds); } // add children for( Linkage l : node.getChildrenLinkages() ) { ret.remove(MassUtils.water,l.getNoBonds()); // remove a water molecule for each bond computeMolecule(ret,l.getChildResidue(),substitution_molecule); } } private int computeNoMethylPositions(Residue node) { if( node==null ) return 0; int ret = 0; int no_bonds = node.getNoBonds(); if( node.isBracket() ) { int no_linked_labiles = Math.min(countLabilePositions(),countDetachedLabiles()); ret -= (no_bonds-no_linked_labiles); } else { if( node.getType().isDroppedWithMethylation() ) ret -= 1; else ret += node.getType().getNoMethyls()-no_bonds; } // add children for( Linkage l : node.getChildrenLinkages() ) ret += computeNoMethylPositions(l.getChildResidue()); return ret; } private int computeNoAcetylPositions(Residue node) { if( node==null ) return 0; int ret = 0; int no_bonds = node.getNoBonds(); if( node.isBracket() ) { int no_linked_labiles = Math.min(countLabilePositions(),countDetachedLabiles()); ret -= (no_bonds-no_linked_labiles); } else { if( node.getType().isDroppedWithAcetylation() ) ret -= 1; else ret += node.getType().getNoAcetyls()-no_bonds; } // add children for( Linkage l : node.getChildrenLinkages() ) ret += computeNoMethylPositions(l.getChildResidue()); return ret; } //-------------------- // handling of labile residues /** Return true if any of the residue can drop during fragmentation. */ public boolean hasLabileResidues() { return hasLabileResidues(root) || hasLabileResidues(bracket); } private boolean hasLabileResidues(Residue current) { if( current==null ) return false; if( current.isLabile() ) return true; for( Linkage l : current.getChildrenLinkages() ) if( hasLabileResidues(l.getChildResidue()) ) return true; return false; } /** Detach all labile residues in the structure, move the residues to the bracket and put labile cleavage markers in their place. */ public Glycan detachLabileResidues() { Glycan ret = this.clone(); ret.detachLabileResidues(ret.getRoot()); return ret; } private void detachLabileResidues(Residue current) { if( current==null ) return; if( current.isLabile() ) { // create cleavage Residue l_leaf = new Residue(ResidueType.createLCleavage()); l_leaf.setCleavedResidue(current); // attach cleavage to structure current.getParentLinkage().setChildResidue(l_leaf); l_leaf.setParentLinkage(current.getParentLinkage()); current.setParentLinkage(null); // add to labiles set addAntenna(current.cloneResidue()); } else { for( Linkage l : current.getChildrenLinkages() ) detachLabileResidues(l.getChildResidue()); } } /** Return the configuration of detached labile residues. @see #detachLabileResidues */ public TypePattern getDetachedLabilesPattern() { TypePattern conf = new TypePattern(); if( bracket==null ) return conf; for( Linkage l : bracket.getChildrenLinkages() ) if( l.getChildResidue().isLabile() ) conf.add(l.getChildResidue().getTypeName()); return conf; } /** Return the configuration of labile residues contained in the structure. @see #detachLabileResidues */ public TypePattern getAllLabilesPattern() { TypePattern conf = new TypePattern(); getAllLabilesPattern(root,conf); getAllLabilesPattern(bracket,conf); return conf; } private void getAllLabilesPattern(Residue current, TypePattern conf) { if( current==null ) return; if( current.isLabile() ) conf.add(current.getTypeName()); for( Linkage l : current.getChildrenLinkages() ) getAllLabilesPattern(l.getChildResidue(),conf); } /** Return the configuration of empty labile positions as marked by labile cleavages. @see #detachLabileResidues */ public TypePattern getLabilePositionsPattern() { TypePattern conf = new TypePattern(); getLabilePositionsPattern(root,conf); return conf; } private void getLabilePositionsPattern(Residue current, TypePattern conf) { if( current==null ) return; if( current.isLCleavage() ) conf.add(current.getCleavedResidue().getTypeName()); for( Linkage l : current.getChildrenLinkages() ) getLabilePositionsPattern(l.getChildResidue(),conf); } /** Return the number of detached labile residues. @see #detachLabileResidues */ public int countDetachedLabiles() { int count = 0; if( bracket!=null ) { for( Linkage l : bracket.getChildrenLinkages() ) if( l.getChildResidue().isLabile() ) count++; } return count; } /** Count the number empty labile positions as marked by labile cleavages. @see #detachLabileResidues */ public int countLabilePositions() { return countLabilePositions(root) + countLabilePositions(bracket); } private int countLabilePositions(Residue current) { if( current==null ) return 0; if( current.isLCleavage() ) return 1; int count = 0; for( Linkage l : current.getChildrenLinkages() ) count += countLabilePositions(l.getChildResidue()); return count; } /** Return a copy of the current structure where all detached labile residues are removed from the bracket. @see #detachLabileResidues */ public Glycan removeDetachedLabiles() { Glycan ret = this.clone(); if( ret.bracket==null ) return ret; for( Iterator<Linkage> link_enum = ret.bracket.getChildrenLinkages().iterator(); link_enum.hasNext(); ) { Linkage l = link_enum.next(); if( l.getChildResidue().isLabile() ) { l.setParentResidue(null); link_enum.remove(); } } if( ret.bracket.getChildrenLinkages().size()==0 ) ret.bracket = null; return ret; } private void removeDetachedLabile(String typename) { if( bracket==null ) return; for(Linkage l : bracket.getChildrenLinkages() ) { if( l.getChildResidue().getTypeName().equals(typename) ) { bracket.removeChild(l.getChildResidue()); break; } } if( !bracket.hasChildren() ) bracket = null; } /** Return all possible structure resulting from the placement of a combination of the labile residues contained in <code>avail_labiles</code> on <code>structure</code> */ static public Collection<Glycan> getAllLabilesConfigurations(Glycan structure, TypePattern avail_labiles) { if( structure==null ) return new Vector<Glycan>(); return structure.getAllLabilesConfigurations(avail_labiles); } /** Return all possible structures resulting from the placement of a combination of the labile residues contained in this structure. */ public Collection<Glycan> getAllLabilesConfigurations() { Glycan dest = this.detachLabileResidues(); return dest.getAllLabilesConfigurations(dest.getDetachedLabilesPattern()); } protected Collection<Glycan> getAllLabilesConfigurations(TypePattern avail_labiles) { Vector<Glycan> ret = new Vector<Glycan>(); // init Glycan dest = this.removeDetachedLabiles(); int no_labiles = avail_labiles.size(); // simplest case, no labiles ret.add(dest); // general case all permutations test if( no_labiles>0 ) { TypePattern labile_pos = dest.getLabilePositionsPattern(); for( int i=1; i<=no_labiles && i<=labile_pos.size(); i++ ) { if( i==labile_pos.size() ) ret.add(dest.reattachAllLabileResidues()); else { for( TypePattern conf : labile_pos.subPatterns(i) ) ret.add(dest.addLabileResidues(conf)); } } } return ret; } private Glycan addLabileResidues(TypePattern conf) { Glycan ret = this.clone(); try { if( conf!=null ) { for( String typename : conf.getTypes() ) ret.addAntenna(ResidueDictionary.newResidue(typename)); } } catch(Exception e) { LogUtils.report(e); } return ret; } public Glycan reattachAllLabileResidues() { Glycan ret = this.clone(); ret.reattachAllLabileResidues(ret.getRoot()); return ret; } private void reattachAllLabileResidues(Residue current) { if( current==null ) return; if( current.isLCleavage() ) { // attach labile residue current.getParentLinkage().setChildResidue(current.getCleavedResidue()); current.getCleavedResidue().setParentLinkage(current.getParentLinkage()); // detach cleavage current.setParentLinkage(null); // remove a labile residue of the same type from bracket removeDetachedLabile(current.getCleavedResidue().getTypeName()); } else { for( Linkage l : current.getChildrenLinkages() ) reattachAllLabileResidues(l.getChildResidue()); } } //------------------------ // serialization /** Create a new glycan structure from a string. @see GWSParser#fromString */ static public Glycan fromString(String str) { try { return GWSParser.fromString(str,new MassOptions()); } catch(Exception e) { LogUtils.report(e); return null; } } /** Create a new glycan structure from a string with the specific mass settings. @see GWSParser#fromString */ static public Glycan fromString(String str, MassOptions default_mass_options) { try { return GWSParser.fromString(str,default_mass_options); } catch(Exception e) { LogUtils.report(e); return null; } } /** Return a string representation of the structure. @see GWSParser#toString */ public String toString() { return GWSParser.toString(this,false); } /** Return a string representation of the structure where each subtree is ordered. @see GWSParser#toString */ public String toStringOrdered() { return GWSParser.toString(this,true); } /** Return a string representation of the structure where each subtree is ordered. @param add_massopt if <code>false</code> do not add the mass settings to the string representation @see GWSParser#toString */ public String toStringOrdered(boolean add_massopt) { return GWSParser.toString(this,true,add_massopt); } /** Return a string representation of the structure in the GlycoCT format @see GlycoCTParser#toGlycoCT */ public String toGlycoCT() { return new GlycoCTParser(false).toGlycoCT(this); } /** Return a string representation of the structure in the GlycoCT condensed format @see GlycoCTCondensedParser#toGlycoCTCondensed */ public String toGlycoCTCondensed() { return new GlycoCTCondensedParser(false).toGlycoCTCondensed(this); } /** Return a {@link Sugar Sugar} object representing this structure. @see GlycoCTParser#toSugar */ public Sugar toSugar() throws Exception { return new GlycoCTParser(false).toSugar(this); } /** Create a new glycan structure from a string in GlycoCT format. @see GlycoCTParser#fromGlycoCT */ static public Glycan fromGlycoCT(String str) { try { return new GlycoCTParser(false).fromGlycoCT(str,new MassOptions()); } catch(Exception e) { e.printStackTrace(); LogUtils.report(e); return null; } } /** Create a new glycan structure from a string in GlycoCT format with the specific mass settings. @see GlycoCTParser#fromGlycoCT */ static public Glycan fromGlycoCT(String str, MassOptions default_mass_options) { try { return new GlycoCTParser(false).fromGlycoCT(str,default_mass_options); } catch(Exception e) { LogUtils.report(e); return null; } } /** Create a new glycan structure from a string in GlycoCT condensed format. @see GlycoCTCondensedParser#fromGlycoCTCondensed */ static public Glycan fromGlycoCTCondensed(String str) { try { return new GlycoCTCondensedParser(false).fromGlycoCTCondensed(str,new MassOptions()); } catch(Exception e) { e.printStackTrace(); LogUtils.report(e); return null; } } /** Create a new glycan structure from a string in GlycoCT condensed format. @param tolerate_unknown if <code>true</code> tolerate residues of a type that is not specified in the dictionary @see ResidueDictionary @see GlycoCTCondensedParser#fromGlycoCTCondensed */ static public Glycan fromGlycoCTCondensed(String str, boolean tolerate_unknown) { try { return new GlycoCTCondensedParser(tolerate_unknown).fromGlycoCTCondensed(str,new MassOptions()); } catch(Exception e) { e.printStackTrace(); LogUtils.report(e); return null; } } /** Create a new glycan structure from a string in GlycoCT condensed format with the specific mass settings. @see GlycoCTCondensedParser#fromGlycoCTCondensed */ static public Glycan fromGlycoCTCondensed(String str, MassOptions default_mass_options) { try { return new GlycoCTCondensedParser(false).fromGlycoCTCondensed(str,default_mass_options); } catch(Exception e) { LogUtils.report(e); return null; } } /** Create a new glycan structure from its XML representation as part of a DOM tree. */ static public Glycan fromXML(Node s_node, MassOptions default_mass_options) throws Exception { Glycan ret = fromString(XMLUtils.getAttribute(s_node,"structure"),default_mass_options); return ret; } /** Create an XML representation of the current glycan structure to be part of a DOM tree. */ public Element toXML(Document document) { if( document==null ) return null; Element s_node = document.createElement("Glycan"); if( s_node==null ) return null; s_node.setAttribute("structure", toString()); return s_node; } /** Default SAX handler to read a representation of this object from an XML stream. */ public static class SAXHandler extends SAXUtils.ObjectTreeHandler { private MassOptions default_mass_options; /** Default constructor. */ public SAXHandler(MassOptions _default_mass_options) { default_mass_options = _default_mass_options; } protected boolean isElement(String namespaceURI, String localName, String qName) { return qName.equals(getNodeElementName()); } public static String getNodeElementName() { return "Glycan"; } protected void initContent(String namespaceURI, String localName, String qName, Attributes atts) throws SAXException { super.initContent(namespaceURI,localName,qName,atts); object = Glycan.fromString(atts.getValue("structure"),default_mass_options); } } /** Write a representation of this object into an XML stream using a SAX handler. */ public void write(TransformerHandler th) throws SAXException { AttributesImpl atts = new AttributesImpl(); atts.addAttribute("","","structure","CDATA",this.toString()); th.startElement("","","Glycan",atts); th.endElement("","","Glycan"); } }