/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ package org.eurocarbdb.resourcesdb.monosaccharide; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import org.eurocarbdb.resourcesdb.Config; import org.eurocarbdb.resourcesdb.ResourcesDbException; import org.eurocarbdb.resourcesdb.glycoconjugate_derived.*; import org.eurocarbdb.resourcesdb.template.TemplateContainer; /** * Routines to convert monosaccharide basetypes of the EUROCarbDB sugar object model to ones of the MonoSaccharideDB object model and vice versa * @author Thomas Luetteke * */ public class BasetypeConversion { public static Monosaccharide eurcarbdbToMsdb(EcdbMonosaccharide eurocarbdbMs, Config conf, TemplateContainer container) throws ResourcesDbException { Monosaccharide msdbMs = new Monosaccharide(conf, container); eurocarbdbToMsdb(eurocarbdbMs, msdbMs); return msdbMs; } public static void eurocarbdbToMsdb(EcdbMonosaccharide eurocarbdbMs, Monosaccharide msdbMs) throws ResourcesDbException { msdbMs.setSize(eurocarbdbMs.getSuperclass().getNumberOfC()); int ringStart = eurocarbdbMs.getRingStart(); if(ringStart == EcdbMonosaccharide.OPEN_CHAIN) { ringStart = Basetype.OPEN_CHAIN; } else if(ringStart == EcdbMonosaccharide.UNKNOWN_RING) { ringStart = Basetype.UNKNOWN_RING; } msdbMs.setRingStartNoAdjustment(ringStart); msdbMs.setDefaultCarbonylPosition(ringStart); int ringEnd = eurocarbdbMs.getRingEnd(); if(ringEnd == EcdbMonosaccharide.OPEN_CHAIN) { ringEnd = Basetype.OPEN_CHAIN; } else if(ringEnd == EcdbMonosaccharide.UNKNOWN_RING) { ringEnd = Basetype.UNKNOWN_RING; } msdbMs.setRingEnd(ringEnd); msdbMs.setAnomer(anomerEurocarbdbToMsdb(eurocarbdbMs.getAnomer())); //*** convert modifications: *** copyCoreModificationsFromEurocarbdbToMsdb(eurocarbdbMs, msdbMs); //*** get Stereocode from basetypes + modifications: *** String stereo = BasetypeConversion.getStereocodeFromBasetypeList(eurocarbdbMs.getBaseTypeList()); if(stereo.length() > 0) { stereo = Stereocode.expandChiralonlyStereoString(stereo, msdbMs); } else { //*** residue is superclass only *** stereo = Stereocode.getSuperclassStereostring(msdbMs.getSize()); stereo = Stereocode.markNonchiralPositionsInStereoString(stereo, msdbMs); } msdbMs.setStereoStr(stereo); msdbMs.setAnomerInStereocode(); } public static org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbMonosaccharide msdbToEurocarbdb(Monosaccharide msdbMs) throws GlycoconjugateException { //*** create new object and set anomeric and superclass: *** org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbMonosaccharide eurocarbdbMs = new org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbMonosaccharide(anomerMsdbToEurocarbdb(msdbMs.getAnomer()), org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbSuperclass.forCAtoms(msdbMs.getSize())); int ringStart = msdbMs.getRingStart(); if(ringStart == Basetype.OPEN_CHAIN) { ringStart = EcdbMonosaccharide.OPEN_CHAIN; } else if(ringStart == Basetype.UNKNOWN_RING) { ringStart = EcdbMonosaccharide.UNKNOWN_RING; } int ringEnd = msdbMs.getRingEnd(); if(ringEnd == Basetype.OPEN_CHAIN) { ringEnd = EcdbMonosaccharide.OPEN_CHAIN; } else if(ringEnd == Basetype.UNKNOWN_RING) { ringEnd = EcdbMonosaccharide.UNKNOWN_RING; } if(ringEnd == EcdbMonosaccharide.UNKNOWN_RING) { ringStart = EcdbMonosaccharide.UNKNOWN_RING; } if(ringEnd == EcdbMonosaccharide.OPEN_CHAIN) { ringStart = EcdbMonosaccharide.OPEN_CHAIN; } eurocarbdbMs.setRing(ringStart, ringEnd); //*** set basetypes: *** ArrayList<EcdbBaseType> basetypeList = BasetypeConversion.getEurocarbdbMsBasetypesFromMsdbMonosaccharide(msdbMs); for(Iterator<EcdbBaseType> iter = basetypeList.iterator(); iter.hasNext();) { eurocarbdbMs.addBaseType(iter.next()); } //*** convert modifications: *** for(Iterator<CoreModification> iter = msdbMs.getCoreModifications().iterator(); iter.hasNext();) { CoreModification msdbMod = iter.next(); //*** check for modifications that are substituents in eurocarbdb: *** //*** (conversion is handled in MonosaccharideConverter.convertMonosaccharide()) *** if(msdbMod.getTemplate().equals(CoreModificationTemplate.ANHYDRO)) { continue; //*** anhydro is treated as substituent in eurocarbdb *** } if(msdbMod.getTemplate().equals(CoreModificationTemplate.LACTONE)) { continue; //*** lactone is treated as substituent in eurocarbdb *** } if(msdbMod.getTemplate().equals(CoreModificationTemplate.EPOXY)) { continue; //*** epoxy is treated as substituent in eurocarbdb *** } eurocarbdbMs.addModification(BasetypeConversion.ModificationMsdbToEurocarbdb(msdbMod)); } return(eurocarbdbMs); } public static org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer anomerMsdbToEurocarbdb(Anomer anom) { if(Anomer.ALPHA.equals(anom)) { return(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer.Alpha); } if(Anomer.BETA.equals(anom)) { return(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer.Beta); } if(Anomer.OPEN_CHAIN.equals(anom) || Anomer.NONE.equals(anom)) { return(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer.OpenChain); } return(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer.Unknown); } public static Anomer anomerEurocarbdbToMsdb(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer anom) { if(anom.equals(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer.Alpha)) { return(Anomer.ALPHA); } if(anom.equals(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer.Beta)) { return(Anomer.BETA); } if(anom.equals(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbAnomer.OpenChain)) { //TODO: distinguish between openChain and no anomer in ring (would need entire monosaccharide, not just anomer) return(Anomer.OPEN_CHAIN); } return(Anomer.UNKNOWN); } private static HashMap<String, EcdbBaseType> eurocarbBasetypesByStereocodeMap = null; private static void fillEurocarbBasetypeByStereocodeMap() { BasetypeConversion.eurocarbBasetypesByStereocodeMap = new HashMap<String, EcdbBaseType>(); for(EcdbBaseType bt : EcdbBaseType.values()) { BasetypeConversion.eurocarbBasetypesByStereocodeMap.put(bt.getStereo(), bt); } } private static EcdbBaseType getEurocarbBasetypeByStereoString(String stereo) throws ResourcesDbException { EcdbBaseType base = null; if(Stereocode.stereoStringHasRelativePosition(stereo)) { if(Stereocode.stereoStringContainsAbsoluteAndRelative(stereo)) { throw new MonosaccharideException("Cannot get EurocarbDB basetype from a stereocode string that contains both absolute and relative configurations: " + stereo); } return(getEuroCarbBasetypeByRelativeStereostring(stereo)); } else { base = BasetypeConversion.eurocarbBasetypesByStereocodeMap.get(stereo); } if(base == null) { throw new ResourcesDbException("Cannot get EurocarbDB basetype from stereocode string " + stereo); } return(base); } private static EcdbBaseType getEuroCarbBasetypeByRelativeStereostring(String rStereo) throws ResourcesDbException { String aStereo = Stereocode.relativeToAbsolute(rStereo); EcdbBaseType aBase = BasetypeConversion.getEurocarbBasetypeByStereoString(aStereo); try { EcdbBaseType rBase = EcdbBaseType.forName("x" + aBase.getName().substring(1)); return(rBase); } catch(GlycoconjugateException ge) { ResourcesDbException me = new ResourcesDbException("Cannot get EurocarbDB basetype from stereocode string " + rStereo); me.initCause(ge); throw me; } } /** * @param msdbMs * @return * @throws GlycoconjugateException */ public static ArrayList<EcdbBaseType> getEurocarbdbMsBasetypesFromMsdbMonosaccharide(Monosaccharide msdbMs) throws GlycoconjugateException { if(BasetypeConversion.eurocarbBasetypesByStereocodeMap == null) { BasetypeConversion.fillEurocarbBasetypeByStereocodeMap(); } ArrayList<EcdbBaseType> basetypeList = new ArrayList<EcdbBaseType>(); try { String stereo = msdbMs.getStereoStr(); if(msdbMs.getRingStart() > 1) { //*** anomeric center not at position1 => mask potential anomeric stereochemistry *** stereo = Stereocode.setPositionInStereoString(stereo, StereoConfiguration.Nonchiral.getStereosymbol(), msdbMs.getRingStart()); } stereo = stereo.substring(1); //*** remove position1 (always nonchiral or anomeric) *** stereo = stereo.replaceAll("" + StereoConfiguration.Nonchiral.getStereosymbol(), ""); if(!stereo.replaceAll("" + StereoConfiguration.Unknown.getStereosymbol(), "").equals("")) { //*** residue is not just a superclass *** if(stereo.contains("" + StereoConfiguration.Unknown.getStereosymbol())) { throw new ResourcesDbException("MonosaccharideDB stereocode contains unknown configurations - cannot generate basetype list for EuroCarbDB monosaccharide from that."); } //*** translate stereocenters into basetype(s): *** while(stereo.length() > 0) { if(stereo.length() > 4) { basetypeList.add(0, BasetypeConversion.getEurocarbBasetypeByStereoString(stereo.substring(0, 4))); stereo = stereo.substring(4); } else { basetypeList.add(0, BasetypeConversion.getEurocarbBasetypeByStereoString(stereo)); stereo = ""; } } } } catch(ResourcesDbException me) { throw new GlycoconjugateException("Error in translating stereocode to basetype list: " + me.getMessage()); } return(basetypeList); } public static String getStereocodeFromBasetypeList(ArrayList<EcdbBaseType> basetypeList) throws ResourcesDbException { String stereo = ""; for(Iterator<EcdbBaseType> iter = basetypeList.iterator(); iter.hasNext();) { EcdbBaseType basetype = iter.next(); String tmpStereo = basetype.getStereo(); if(tmpStereo.contains("*")) { String basename = basetype.getName(); try { basetype = EcdbBaseType.forName("d" + basename.substring(1)); tmpStereo = Stereocode.absoluteToRelative(basetype.getStereo()); } catch(GlycoconjugateException ge) { ResourcesDbException me = new ResourcesDbException("GetStereocodeFromBasetypeList: Cannot get absolute equivalent for relative basetype " + basename + " (d" + basename.substring(1) + ")"); me.initCause(ge); throw me; } } stereo = tmpStereo + stereo; } return(stereo); } public static void copyCoreModificationsFromEurocarbdbToMsdb(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbMonosaccharide eurocarbdbMs, Monosaccharide msdbMs) throws ResourcesDbException { ArrayList<org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification> eurocarbdbModificationList; eurocarbdbModificationList = eurocarbdbMs.getModificationList(); for(Iterator<org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification> iter = eurocarbdbModificationList.iterator(); iter.hasNext();) { org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification eurocarbMod = iter.next(); if(eurocarbMod.getName().equals(EcdbModificationType.ALDI.getName())) { if(eurocarbMod.getPositionOne() != 1) { throw new ResourcesDbException("Alditol position other than one in EuroCarbDB monosaccharide"); } msdbMs.setAlditol(true); continue; } CoreModification msdbCoremod = ModificationEurocarbdbToMsdb(eurocarbMod); //TO DO: take into account, that multiple keto modifications might be given in a residue whithout a defined ring start if(msdbCoremod.getTemplate().equals(CoreModificationTemplate.KETO) && (msdbMs.getDefaultCarbonylPosition() == Basetype.UNKNOWN_RING)) { msdbMs.setDefaultCarbonylPosition(msdbCoremod.getPosition1().get(0)); } msdbMs.addCoreModification(msdbCoremod); } } public static CoreModification ModificationEurocarbdbToMsdb(org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification eurocarbdbMod) throws MonosaccharideException { CoreModification msdbMod = new CoreModification(); if(eurocarbdbMod.getName().equals(EcdbModificationType.DEOXY.getName())) { msdbMod.setModification(CoreModificationTemplate.DEOXY, eurocarbdbMod.getPositionOne()); } else if(eurocarbdbMod.getName().equals(EcdbModificationType.ACID.getName())) { msdbMod.setModification(CoreModificationTemplate.ACID, eurocarbdbMod.getPositionOne()); } else if(eurocarbdbMod.getName().equals(EcdbModificationType.KETO.getName())) { msdbMod.setModification(CoreModificationTemplate.KETO, eurocarbdbMod.getPositionOne()); } else if(eurocarbdbMod.getName().equals(EcdbModificationType.DOUBLEBOND.getName())) { msdbMod.setDivalentModification(CoreModificationTemplate.EN, eurocarbdbMod.getPositionOne(), eurocarbdbMod.getPositionOne() + 1); } else if(eurocarbdbMod.getName().equals(EcdbModificationType.UNKNOWN_BOUBLEBOND.getName())) { msdbMod.setDivalentModification(CoreModificationTemplate.ENX, eurocarbdbMod.getPositionOne(), eurocarbdbMod.getPositionOne() + 1); } else if(eurocarbdbMod.getName().equals(EcdbModificationType.SP2_HYBRID.getName())) { msdbMod.setModification(CoreModificationTemplate.SP2, eurocarbdbMod.getPositionOne()); } else if(eurocarbdbMod.getName().equals(EcdbModificationType.GEMINAL.getName())) { throw new MonosaccharideException("Geminal residues not yet supported."); //TODO: implement geminal in msdb } else if(eurocarbdbMod.getName().equals(EcdbModificationType.ANHYDRO.getName())) { msdbMod.setDivalentModification(CoreModificationTemplate.ANHYDRO, eurocarbdbMod.getPositionOne(), eurocarbdbMod.getPositionTwo()); } else { throw new MonosaccharideException("cannot convert eurocarbdb core modification " + eurocarbdbMod.getName()); } return(msdbMod); } public static org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification ModificationMsdbToEurocarbdb(CoreModification msdbMod) throws GlycoconjugateException { org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification eurocarbdbMod = null; if(msdbMod.getTemplate().equals(CoreModificationTemplate.DEOXY)) { eurocarbdbMod = new org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification(EcdbModificationType.DEOXY.getName(), msdbMod.getIntValuePosition1()); } else if(msdbMod.getTemplate().equals(CoreModificationTemplate.EN)) { eurocarbdbMod = new org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification(EcdbModificationType.DOUBLEBOND.getName(), msdbMod.getIntValuePosition1(), msdbMod.getIntValuePosition2()); } else if(msdbMod.getTemplate().equals(CoreModificationTemplate.ENX)) { eurocarbdbMod = new org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification(EcdbModificationType.UNKNOWN_BOUBLEBOND.getName(), msdbMod.getIntValuePosition1(), msdbMod.getIntValuePosition2()); } else if(msdbMod.getTemplate().equals(CoreModificationTemplate.ACID)) { eurocarbdbMod = new org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification(EcdbModificationType.ACID.getName(), msdbMod.getIntValuePosition1()); } else if(msdbMod.getTemplate().equals(CoreModificationTemplate.KETO)) { eurocarbdbMod = new org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification(EcdbModificationType.KETO.getName(), msdbMod.getIntValuePosition1()); } else if(msdbMod.getTemplate().equals(CoreModificationTemplate.SP2)) { eurocarbdbMod = new org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification(EcdbModificationType.SP2_HYBRID.getName(), msdbMod.getIntValuePosition1()); } else if(msdbMod.getTemplate().equals(CoreModificationTemplate.ALDITOL)) { eurocarbdbMod = new org.eurocarbdb.resourcesdb.glycoconjugate_derived.EcdbModification(EcdbModificationType.ALDI.getName(), msdbMod.getIntValuePosition1()); } else if(msdbMod.getTemplate().equals(CoreModificationTemplate.YN)) { throw new GlycoconjugateException("Core modification 'Yn' not defined for EuroCarbDB monosaccharides."); } else { throw new GlycoconjugateException("Unknown msdb core modification: " + msdbMod.getName()); } return(eurocarbdbMod); } }