/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ package org.eurocarbdb.resourcesdb.monosaccharide; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import org.eurocarbdb.resourcesdb.Config; import org.eurocarbdb.resourcesdb.ResourcesDbException; import org.eurocarbdb.resourcesdb.atom.Atom; import org.eurocarbdb.resourcesdb.glycoconjugate_derived.LinkageType; import org.eurocarbdb.resourcesdb.template.BasetypeTemplate; import org.eurocarbdb.resourcesdb.template.BasetypeTemplateContainer; import org.eurocarbdb.resourcesdb.template.TemplateContainer; /** * Methods to validate / check Monosaccharides * @author Thomas Luetteke * */ public class MonosaccharideValidation { public static void checkMonosaccharideConsistency(Monosaccharide ms, TemplateContainer container) throws ResourcesDbException { MonosaccharideValidation.checkMonosaccharideConsistency(ms, container, true); } public static void checkMonosaccharideConsistency(Monosaccharide ms, TemplateContainer container, Config conf) throws ResourcesDbException { MonosaccharideValidation.checkMonosaccharideConsistency(ms, container, !conf.isPreserveAlditolOrientation()); } /** * Do a number of checks on a monosaccharide to test if it is well-formed. * These checks include tests, if given positions are within the allowed ranges, * if the anomeric state is consistent with the ring type, * if given substitutions and / or core modifications exclude each other or are in conflict with the ring closure positions. * Besides, it is tested if the stereocode is in accordance with modifications that cause a loss of stereochemistry, * if alditols are given in the correct orientation, * and if the deoxygenation pattern of "enx" core modifications is indeed unknown. * The enx check result in automatic adjustment of the monosaccharide if the data is inappropriate. * The same applies to the alditol orientation check, if the checkOrientation parameter is true. * @param ms the monosaccharide to be checked * @param container a TemplateContainer to get Templates needed for the checks * @param checkOrientation a flag to indicate whether alditol orientation shall be checked or not * @throws MonosaccharideException in case any of the checks fails. * @throws ResourcesDbException e.g. in case templates are not set */ public static void checkMonosaccharideConsistency(Monosaccharide ms, TemplateContainer container, boolean checkOrientation) throws ResourcesDbException { //System.out.println("check ms: " + ms.toString()); //*** check, if carbonyl position and ring oxygen are within allowed range: *** if(ms.getRingStart() > ms.getSize() || ms.getRingStart() < -1) { throw new MonosaccharideException("Carbonyl position out of range: " + ms.getRingStart()); } if(ms.getRingEnd() > ms.getSize() || ms.getRingEnd() < -1) { throw new MonosaccharideException("Ring oxygen position out of range: " + ms.getRingEnd()); } //*** check, if anomer and ringtype / core modifications are consistent: *** if(ms.getRingtype().equals(Ringtype.OPEN)) { if(ms.getAnomer().equals(Anomer.ALPHA) || ms.getAnomer().equals(Anomer.BETA)) { throw new MonosaccharideException("Anomer in open chain residue set."); } } else if(ms.isStereolossPositionWithIgnoreType(ms.getRingStart(), CoreModificationTemplate.KETO)) { if(ms.getAnomer().equals(Anomer.ALPHA) || ms.getAnomer().equals(Anomer.BETA)) { throw new MonosaccharideException("Anomer set in residue with non-chiral anomeric center."); } if(ms.getAnomer().equals(Anomer.UNKNOWN)) { ms.setAnomer(Anomer.NONE); ms.setStereoStr(Stereocode.setPositionInStereoString(ms.getStereoStr(), StereoConfiguration.Nonchiral, ms.getRingStart())); } } else { if(ms.getAnomer().equals(Anomer.NONE)) { throw new MonosaccharideException("Anomer 'none' in residue with chiral anomeric center."); } } //*** check modifications: *** //*** check, if modification positions are possible, *** //*** and if present combinations of modifications are allowed *** //TODO: take uncertain positions into account for(int pos = 1; pos <= ms.getSize(); pos++) { //*** get lists of coreModifications / substitutions at current position: *** ArrayList<CoreModification> coreModList = ms.getCoreModificationsByPosition(pos); ArrayList<Substitution> substList = ms.getSubstitutionsByPosition(pos); if(coreModList.size() == 0 && substList.size() == 0) { //*** no modifications present at current position *** continue; } //*** check core modifications: *** //*** rules for ring oxygen position: *** if(pos == ms.getRingEnd()) { if(substList.size() > 0) { for(Substitution roSubst : substList) { if(!(roSubst.getTemplate().isCanReplaceRingOxygen() || roSubst.getLinkagetypeByPosition(pos).equals(LinkageType.H_LOSE))) { throw new MonosaccharideException("Substitution at ring oxygen."); } } } int enCount = 0; for(CoreModification mod : coreModList) { if(mod.getTemplate().equals(CoreModificationTemplate.EN) || mod.getTemplate().equals(CoreModificationTemplate.ENX)) { enCount ++; } else { throw new MonosaccharideException("Disallowed modification at ring oxygen: " + mod.getName()); } } if(enCount > 1) { throw new MonosaccharideException("Multiple 'en' modifications at ring oxygen."); } } //*** rules for carbonyl position: *** if(pos == ms.getRingStart()) { if(substList.size() > 0 && ms.getRingEnd() == Basetype.OPEN_CHAIN && !ms.isAlditol()) { throw new MonosaccharideException("Substitution at carbonyl position of an open chain monosaccharide."); } if(pos > 1) { if(ms.isAlditol()) { throw new MonosaccharideException("Alditols are not defined for ketoses"); } if(ms.getCoreModification(CoreModificationTemplate.EN.getName(), pos) != null || ms.getCoreModification(CoreModificationTemplate.ENX.getName(), pos) != null) { //*** anomeric center of a ketose is involved in a double bond: must be deoxy *** if(ms.getCoreModification(CoreModificationTemplate.DEOXY.getName(), pos) == null) { ms.addCoreModification(new CoreModification(CoreModificationTemplate.DEOXY, pos)); } } } if(ms.getRingEnd() == Basetype.OPEN_CHAIN) { //*** open chain: only acid, en/x and sp2 core modifications allowed at carbonyl position *** for(CoreModification mod : coreModList) { if(mod.getTemplate().equals(CoreModificationTemplate.ACID)) { continue; } if(mod.getTemplate().equals(CoreModificationTemplate.SP2)) { continue; } if(mod.getTemplate().equals(CoreModificationTemplate.EN)) { continue; } if(mod.getTemplate().equals(CoreModificationTemplate.ENX)) { continue; } if(mod.getTemplate().equals(CoreModificationTemplate.DEOXY)) { if(ms.getRingEnd() > 1 || !ms.isAlditol()) { throw new MonosaccharideException("Deoxy modification at carbonyl position of open chain residue."); } } if(mod.getTemplate().equals(CoreModificationTemplate.KETO)) { continue; } throw new MonosaccharideException("Modification " + mod.getName() + " not allowed at carbonyl group of open chain residue."); } } } //*** rules by modification type: *** int bondSum = 2; if(pos == 1 && ms.getRingStart() > 1) { bondSum--; } if(pos == ms.getSize() && !(pos == ms.getRingEnd())) { bondSum--; } for(CoreModification mod : coreModList) { if(mod.getTemplate().equals(CoreModificationTemplate.ACID)) { if(pos > 1 && pos < ms.getSize()) { throw new MonosaccharideException("Acid modification within backbone (pos. " + pos + ")"); } if(pos == 1 && ms.getRingStart() < 2 && ms.getRingEnd() > 0) { //TODO: check, if lactone modification is present, add it if not throw new MonosaccharideException("Acid modification at ring closure position (" + pos + ")."); } if(pos == ms.getRingEnd()) { throw new MonosaccharideException("Acid modification at ring closure position (" + pos + ")."); } if(coreModList.size() > 1) { //*** acid modification must not occurr together with other core modifications at one position *** throw new MonosaccharideException("Acid modification at otherwise modified position (" + pos + ")."); } if(pos == 1 && ms.isAlditol()) { throw new MonosaccharideException("Excluding modifications: Alditol and 1-Acid"); } } else if(mod.getTemplate().equals(CoreModificationTemplate.KETO)) { if(substList.size() > 0) { throw new MonosaccharideException("Substitution at keto position (" + pos + ")."); } if(CoreModificationTemplate.modListContainsModType(coreModList, CoreModificationTemplate.DEOXY) && pos != ms.getRingStart()) { throw new MonosaccharideException("Deoxygenation at keto position (" + pos + ")."); } if(CoreModificationTemplate.modListContainsModType(coreModList, CoreModificationTemplate.SP2)) { if(pos != ms.getRingStart()) { throw new MonosaccharideException("Sp2 hybride at keto position (" + pos + ")."); } } if(ms.isAlditol()) { throw new MonosaccharideException("Core modifications ALDITOL and KETO must not occurr together in one monosaccharide."); } } else if(mod.getTemplate().equals(CoreModificationTemplate.DEOXY)) { for(Substitution subst : substList) { if(!subst.getLinkagetypeByPosition(pos).equals(LinkageType.H_LOSE)) { throw new MonosaccharideException("Substitution at deoxy position (" + pos + ")."); } } } else if(mod.getTemplate().equals(CoreModificationTemplate.SP2)) { //*** sp2 hybride requires substituent linked via double bond: *** boolean hasDoublebondSubst = false; for(Substitution subst : substList) { if(subst.getBondOrder1() == 2) { hasDoublebondSubst = true; break; } if(subst.getBondOrder2() == 2) { hasDoublebondSubst = true; break; } } if(!hasDoublebondSubst) { throw new MonosaccharideException("Sp2 hybride without substitution of bond order 2"); } } else if(mod.getTemplate().equals(CoreModificationTemplate.ANHYDRO)) { for(Substitution subst : substList) { if(!subst.getLinkagetypeByPosition(pos).equals(LinkageType.H_LOSE)) { throw new MonosaccharideException("Substitution at anhydro position (" + pos + ")."); } } if(CoreModificationTemplate.modListContainsModType(coreModList, CoreModificationTemplate.DEOXY)) { throw new MonosaccharideException("Deoxygenation at anhydro position (" + pos + ")."); } if(mod.getIntValuePosition2() == pos + 1) { //*** set modification type to "epoxy" if anhydro modification occurs between neighbouring carbons *** mod.setTemplate(CoreModificationTemplate.EPOXY); } } else if(mod.getTemplate().equals(CoreModificationTemplate.EPOXY)) { for(Substitution subst : substList) { if(!subst.getLinkagetypeByPosition(pos).equals(LinkageType.H_LOSE)) { throw new MonosaccharideException("Substitution at epoxy position (" + pos + ")."); } } if(CoreModificationTemplate.modListContainsModType(coreModList, CoreModificationTemplate.DEOXY)) { throw new MonosaccharideException("Deoxygenation at epoxy position (" + pos + ")."); } if(mod.getIntValuePosition2() != mod.getIntValuePosition1() + 1) { throw new MonosaccharideException("Epoxy modification must be at neighbouring carbons (positions present: " + mod.getIntValuePosition1() + "-" + mod.getIntValuePosition2() + ")"); } } } //*** check substitutions: *** HashMap<LinkageType, Integer> linktypeCountMap = new HashMap<LinkageType, Integer>(); for(Substitution subst : substList) { if(subst.containsPosition1(pos)) { bondSum += subst.getBondOrder1(); if(subst.getLinkagetype1() != null) { Integer linkcount = linktypeCountMap.get(subst.getLinkagetype1()); if(linkcount == null) { linkcount = new Integer(0); } linkcount ++; linktypeCountMap.put(subst.getLinkagetype1(), linkcount); } } if(subst.containsPosition2(pos)) { bondSum += subst.getBondOrder2(); if(subst.getLinkagetype2() != null) { Integer linkcount = linktypeCountMap.get(subst.getLinkagetype2()); if(linkcount == null) { linkcount = new Integer(0); } linkcount ++; linktypeCountMap.put(subst.getLinkagetype2(), linkcount); } } } if(bondSum > 4) { throw new MonosaccharideException("Modifications result in " + bondSum + " bonds for C" + pos); } for(LinkageType linktype : linktypeCountMap.keySet()) { Integer linkcount = linktypeCountMap.get(linktype); if(linkcount.intValue() > 1) { throw new MonosaccharideException(linkcount.intValue() + " substituents of LinkageType " + linktype + " at position " + pos); } if(linktype.equals(LinkageType.H_AT_OH) && linkcount.intValue() > 0) { Integer deoxyCount = linktypeCountMap.get(LinkageType.DEOXY); if(deoxyCount != null && deoxyCount.intValue() > 0) { throw new MonosaccharideException("Substituents of LinkageTypes DEOXY and H_AT_OH at position " + pos); } } if(linktype.equals(LinkageType.H_LOSE) && linkcount.intValue() > 0) { Integer rsCount = linktypeCountMap.get(LinkageType.R_CONFIG); if(rsCount != null && rsCount.intValue() > 0) { throw new MonosaccharideException("Substituents of LinkageTypes H_LOSE and R_CONFIG at position " + pos); } rsCount = linktypeCountMap.get(LinkageType.S_CONFIG); if(rsCount != null && rsCount.intValue() > 0) { throw new MonosaccharideException("Substituents of LinkageTypes H_LOSE and S_CONFIG at position " + pos); } } } } //*** other checks: *** MonosaccharideValidation.checkStereocodeConsistency(ms); MonosaccharideValidation.checkEnxDeoxygenationStates(ms); MonosaccharideValidation.checkModifications(ms); if(checkOrientation) { MonosaccharideValidation.checkAlditolOrientation(ms, container.getBasetypeTemplateContainer()); } } /** * Test, if the stereocode of a monosaccharide is in accordance with the residue size and the core modifications that cause a loss of stereochemistry * @param ms the monosaccharide to be checked * @throws MonosaccharideException in case the test fails */ public static void checkStereocodeConsistency(Monosaccharide ms) throws MonosaccharideException { String stereo = ms.getStereoStr(); if(stereo.length() != ms.getSize()) { throw new MonosaccharideException("Stereocode / residue size mismatch (" + stereo.length() + "/" + ms.getSize() + ")"); } if(!ms.isSuperclass()) { ArrayList<Integer> stereoloss = ms.getStereolossPositions(); for(Integer posInt : stereoloss) { int pos = posInt.intValue(); if(pos == 0) { continue; } if(pos == ms.getRingStart() && !ms.getAnomer().equals(Anomer.NONE) && ! ms.getAnomer().equals(Anomer.OPEN_CHAIN)) { continue; } if(stereo.charAt(pos - 1) != StereoConfiguration.Nonchiral.getStereosymbol()) { throw new MonosaccharideException("Stereocode error: position " + pos + " should be nonchiral (" + stereo + ")"); } } } } /** * Test, if modifications are properly set. * This includes a check, if the template is set and if the given position(s) match the valence given in the template. * @param ms the Monosaccharide to check * @throws ResourcesDbException */ public static void checkModifications(Monosaccharide ms) throws ResourcesDbException { for(CoreModification mod : ms.getCoreModifications()) { if(mod.getTemplate() == null) { throw new MonosaccharideException("missing core modification template: " + mod); } for(Integer pos : mod.getPositions()) { if(pos < -1 || (pos > ms.getSize() && ms.getSize() > 0)) { throw new MonosaccharideException("Core modification (" + mod.getName() + ") position out of range: " + pos); } } if(mod.getTemplate().getValence() == 1) { if(mod.hasPosition2()) { throw new MonosaccharideException("position 2 set in monovalent modification: " + mod); } } else if(mod.getTemplate().getValence() == 2) { if(!mod.hasPosition2()) { throw new MonosaccharideException("missing position 2 of divalent modification: " + mod); } } } for(Substitution subst : ms.getSubstitutions()) { if(subst.getTemplate() == null) { throw new MonosaccharideException("missing substituent template: " + subst); } for(Integer pos : subst.getPositions()) { if(pos < -1 || (pos > ms.getSize() && ms.getSize() > 0)) { throw new MonosaccharideException("Substitution (" + subst.getName() + ") position out of range: " + pos); } } if(subst.getTemplate().getMaxValence() == 1) { if(subst.hasPosition2()) { throw new MonosaccharideException("position 2 set in monovalent substitution: " + subst); } } else if(subst.getTemplate().getMinValence() == 2) { if(!subst.hasPosition2()) { throw new MonosaccharideException("missing position 2 of divalent substitution: " + subst); } } if(LinkageType.H_LOSE.equals(subst.getLinkagetype1()) || LinkageType.R_CONFIG.equals(subst.getLinkagetype1()) || LinkageType.S_CONFIG.equals(subst.getLinkagetype1())) { if(subst.getIntValuePosition1() == 1 || subst.getIntValuePosition1() == ms.getSize()) { Atom linkatom = subst.getLinkingAtom1(); if(linkatom.getElementSymbol().equalsIgnoreCase("C")) { throw new MonosaccharideException("C-linked substituent at terminal carbon."); } } } if(subst.hasPosition2()) { if(LinkageType.H_LOSE.equals(subst.getLinkagetype2()) || LinkageType.R_CONFIG.equals(subst.getLinkagetype2()) || LinkageType.S_CONFIG.equals(subst.getLinkagetype2())) { if(subst.getIntValuePosition2() == 1 || subst.getIntValuePosition2() == ms.getSize()) { try { if(subst.getLinkingAtom2().getElementSymbol().equalsIgnoreCase("C")) { throw new MonosaccharideException("C-linked substituent at terminal carbon."); } } catch(ResourcesDbException rEx) { throw new ResourcesDbException("Cannot check H_LOSE linked subst. at terminal carbon: no linking atom defined", rEx); } } } } } } /** * Check the deoxygenation patterns of "enx" core modifications present in a monosaccharide. * If an "enx" modification has a defined deoxygenation pattern, the modification type is changed to "en". * @param ms the monosaccharide to be checked */ public static void checkEnxDeoxygenationStates(Monosaccharide ms) { for(CoreModification mod : ms.getCoreModifications(CoreModificationTemplate.ENX.getName())) { if(enDeoxypatternConfident(ms, mod)) { try { mod.changeType(CoreModificationTemplate.EN); } catch (MonosaccharideException e) { } } } } /** * Check, if the deoxygenation pattern of an "en"/"enx" core modification is certain * If this is the case, an "enx" modification can be changed to "en". * @param ms the monosaccharide to be checked * @param mod: the "en" or "enx" core modification * @return true, if the deoxy pattern is certain, otherwise false */ public static boolean enDeoxypatternConfident(Monosaccharide ms, CoreModification mod) { int position1 = mod.getIntValuePosition1(); if(position1 == 0) { return(false); } int position2 = position1 + 1; return(enDeoxyStatusPositionConfident(ms, position1) && enDeoxyStatusPositionConfident(ms, position2)); } /** * Check, if the deoxygenation status of a single position is certain * @param ms the monosaccharide to be checked * @param position: the position of the "en" modification to be checked * @return true, if the deoxy pattern is certain, otherwise false */ public static boolean enDeoxyStatusPositionConfident(Monosaccharide ms, int position) { //*** the deoxygenation status is confident if: *** //*** a) the position comprises the ring oxygen or *** if(position == ms.getRingEnd()) { return true; } //*** b) the position is explicitely marked as deoxy or *** if(ms.getCoreModification("deoxy", position) != null) { return true; } //*** c) the position carries a substituent or *** if(ms.getSubstitutionsByPosition(position).size() > 0) { return true; } //*** d) the position is the ring start and is > 1 (must be deoxy in that case) *** if(ms.getRingStart() == position && position > 1) { try { ms.addCoreModification(new CoreModification(CoreModificationTemplate.DEOXY, position)); } catch(MonosaccharideException me) { if(Config.getGlobalConfig().isPrintErrorMsgs(1)) { System.err.println("Exception: " + me); me.printStackTrace(); } } return true; } return false; } /** * Check, if an alditol (or open chain aldaric acid) residue is given in the correct orientation according to IUPAC rules * If this is not the case, the orientation of the residue is changed, i.e. the residue is rotated by 180°. * The stereocode as well as modification / substitutent positions are adjusted as a result of this rotation. * @param ms the monosaccharide to be checked * @param container a BasetypeTemplateContainer to get Templates needed for the check * @return false, if orientation of residue was changed, otherwise true * @throws ResourcesDbException */ public static boolean checkAlditolOrientation(Monosaccharide ms, BasetypeTemplateContainer container) throws ResourcesDbException { if(!hasCorrectAlditolOrientation(ms, container)) { if(ms.isAlditol()) { if(ms.isUronic()) { //*** change combination of alditol + uronic acid to ulonic acid + no alditol (adjustment of acid position is done in rotateAlditol() below, so here only adjustment of alditol necessary) *** ms.setAlditol(false); } } ms.rotateAlditol(); return false; } else { return true; } } public static boolean hasCorrectAlditolOrientation(Monosaccharide ms, BasetypeTemplateContainer container) throws ResourcesDbException { //*** (1) check, if the residue can be rotated, i.e. if there is no defined direction in the backbone *** //*** this is the case if the residue is an alditol or an open chain aldaric acid or an open chain residue with keto function *** boolean isRotatable = false; if(ms.isAlditol()) { isRotatable = true; if(ms.isUronic()) { //*** combination of alditol + uronic acid has to be ulonic acid + no alditol *** /*ms.rotateAlditol(); ms.setAlditol(false);*/ return false; } } else if(ms.isAldaric()) { if(ms.getRingEnd() == -1) { isRotatable = true; } } else if(ms.getRingtype().equals(Ringtype.OPEN)) { if(ms.hasCoreModification(CoreModificationTemplate.KETO)) { isRotatable = true; } } if(!isRotatable) { return true; } //*** (2) residue can be rotated, so check, if correct orientation is given: *** //TODO: check, if open chain residue has keto function, and if that is at lowest possible position String stereo = ms.getStereoStr().replaceAll("" + StereoConfiguration.Nonchiral.getStereosymbol(), ""); if(stereo.contains("" + StereoConfiguration.Unknown.getStereosymbol())) { //TODO: Implement checks based just on modifications positions in case the stereochemistry is entirely undefined (e.g. in Hex5N-ol) return true; //*** no checks possible *** } if(Stereocode.stereoStringContainsAbsoluteAndRelative(stereo)) { return true; //*** no checks possible *** } //*** check, if basetype resulting from current orientation is alphabetically lower than the one resulting from the rotated orientation: *** String stereo1; String stereo2; if(stereo.length() > 4) { stereo1 = stereo.substring(stereo.length() - 4, stereo.length()); stereo2 = Stereocode.rotateStereoString(stereo.substring(0, 4)); } else { stereo1 = stereo; stereo2 = Stereocode.rotateStereoString(stereo); } BasetypeTemplate basetype1 = container.getBasetypeTemplateByStereoString(stereo1); BasetypeTemplate basetype2 = container.getBasetypeTemplateByStereoString(stereo2); if(!basetype1.equals(basetype2)) { //*** basetypes resulting from the two orientations differ; check, if current one is the lexicographically lower one: *** if(basetype1.getBaseName().compareTo(basetype2.getBaseName()) > 0) { //ms.rotateAlditol(); return false; } } else { //*** basetypes resulting from the two orientations are the same, check configurations: *** StereoConfiguration config1 = Stereocode.getConfigurationFromStereoString(stereo1); StereoConfiguration config2 = Stereocode.getConfigurationFromStereoString(stereo2); if(!config1.equals(config2)) { //*** configurations differ; check, if current config is D *** if(config1.equals(StereoConfiguration.Laevus)) { //ms.rotateAlditol(); return false; } } else { //*** residue is symmetric; check, if modifications are at lowest possible positions: *** //TODO: check, if all modifications or only substitutions have to be counted here, and how to handle second position of "en" int modSum1 = 0; int modSum2 = 0; int sizePlusOne = ms.getSize() + 1; List<Substitution> modList = ms.getSubstitutions(); for(int m = 0; m < modList.size(); m++) { ArrayList<Integer> positions = modList.get(m).getPositions(); for(int i = 0; i < positions.size(); i++) { modSum1 += positions.get(i).intValue(); modSum2 += sizePlusOne - positions.get(i).intValue(); } } if(modSum1 > modSum2) { //ms.rotateAlditol(); return false; } else if(modSum1 == modSum2) { //*** check for lexicographic order of lowest different modifications for both orientations: *** for(int m = 1; m < sizePlusOne; m++) { ArrayList<Substitution> mod1 = ms.getSubstitutionsByPosition(m); ArrayList<Substitution> mod2 = ms.getSubstitutionsByPosition(sizePlusOne - m); if(mod1.size() > 0 && mod2.size() > 0) { //*** modification present at position m in both orientations; check lexicographic order: *** ArrayList<String> modNames1 = new ArrayList<String>(); ArrayList<String> modNames2 = new ArrayList<String>(); for(int i = 0; i < mod1.size(); i ++) { modNames1.add(mod1.get(i).getName()); } Collections.sort(modNames1); for(int i = 0; i < mod2.size(); i ++) { modNames2.add(mod2.get(i).getName()); } Collections.sort(modNames2); for(int i = 0; i < Math.min(modNames1.size(), modNames2.size()); i++) { if(modNames1.get(i).compareTo(modNames2.get(i)) > 0) { //ms.rotateAlditol(); return false; } } if(modNames1.size() < modNames2.size()) { //ms.rotateAlditol(); return false; } else if (modNames1.size() > modNames2.size()) { return true; } } else if(mod1.size() == 0 && mod2.size() > 0) { //ms.rotateAlditol(); return false; } else if(mod1.size() > 0 && mod2.size() == 0) { return true; } } } } } return true; } /** * Check, if a monosaccharide is fuzzily defined * @param ms the monosaccharide to be checked * @return true, if any uncertain property is found */ public static boolean checkFuzziness(Monosaccharide ms) { if(ms.getStereocode().hasUncertainPosition()) { return true; } if(ms.getRingEnd() == Basetype.UNKNOWN_RING) { return true; } if(ms.getRingStart() == Basetype.UNKNOWN_RING) { return true; } for(CoreModification mod : ms.getCoreModifications()) { if(mod.getIntValuePosition1() == 0) { return true; } if(mod.hasPosition2() && mod.getIntValuePosition2() == 0) { return true; } } for(Substitution subst : ms.getSubstitutions()) { if(subst.getIntValuePosition1() == 0) { return true; } if(subst.getIntValueSubstituentPosition1() == 0) { return true; } if(subst.hasPosition2()) { if(subst.getIntValuePosition2() == 0) { return true; } if(subst.getIntValueSubstituentPosition2() == 0) { return true; } } if(subst.getTemplate() == null) { return true; } if(subst.getTemplate().isFuzzy()) { return true; } } return false; } /** * A quick check to determine if a substituent can be added at a given position of a monosaccharide. * For the check, the already existing core modifications and substitutions are taken into account, unless they contain fuzzy positions. * @param ms: The monosaccharide to be checked * @param position: The position to be checked * @param linktype: The LinkageType by which a substituent shall be linked. * @return true, if a substituent can be attached to the given position, otherwise false */ public static boolean isSubstitutable(Monosaccharide ms, int position, LinkageType linktype) { if(position == 0) { //*** unknown position, no checks possible *** return true; } if((position < 0) || (position > ms.getSize())) { //*** position out of range *** return false; } if((position == ms.getRingStart()) || (position == ms.getRingEnd())) { return false; } //*** check core modifications: *** for(CoreModification mod: ms.getCoreModifications()) { if(linktype.equals(LinkageType.H_LOSE)) { if(mod.getTemplate().equals(CoreModificationTemplate.EN) || mod.getTemplate().equals(CoreModificationTemplate.ENX)) { if(mod.position1equals(position) || mod.position2equals(position)) { return false; //*** H_LOSE in combination with double bonds is not possible *** } } if(mod.getTemplate().equals(CoreModificationTemplate.DEOXY)) { continue; //*** H_LOSE modification can be added at a deoxy position *** } } if(mod.isSubstitutable()) { continue; } if(mod.position1equals(position)) { return false; } if(mod.position2equals(position)) { return false; } } //*** check substitutions: *** for(Substitution subst: ms.getSubstitutions()) { if(subst.hasPosition2()) { if(subst.position1equals(position) && subst.position2equals(position)) { return false; } if(linktype.equals(LinkageType.H_LOSE)) { if(subst.getLinkagetype1().equals(LinkageType.H_LOSE) && subst.position1equals(position)) { return false; } if(subst.getLinkagetype2().equals(LinkageType.H_LOSE) && subst.position2equals(position)) { return false; } } else { if(!subst.getLinkagetype1().equals(LinkageType.H_LOSE) && subst.position1equals(position)) { return false; } if(!subst.getLinkagetype2().equals(LinkageType.H_LOSE) && subst.position2equals(position)) { return false; } } } else { if(linktype.equals(LinkageType.H_LOSE)) { if(!subst.getLinkagetype1().equals(LinkageType.H_LOSE)) { continue; } } else { if(subst.getLinkagetype1().equals(LinkageType.H_LOSE)) { continue; } } if(subst.position1equals(position)) { return false; } } } return true; } /** * Check, if the core modifications of a monosaccharide imply that it is open chain. * Otherwise, the open chain form has to be explicitely stressed in CarbBank style notations. * @param ms the Monosaccharide to be checked * @return */ public static boolean impliesOpenChain(Monosaccharide ms) { if(ms.getRingtype().equals(Ringtype.OPEN)) { if(ms.isAlditol()) { return true; } if(ms.isAldaric() && !ms.hasCoreModification(CoreModificationTemplate.KETO)) { return true; } if(ms.isAldonic() && !ms.hasCoreModification(CoreModificationTemplate.KETO)) { return true; } } return false; } }