/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ package org.eurocarbdb.resourcesdb.io; import java.util.ArrayList; import org.eurocarbdb.resourcesdb.*; import org.eurocarbdb.resourcesdb.monosaccharide.*; import org.eurocarbdb.resourcesdb.template.BasetypeTemplate; import org.eurocarbdb.resourcesdb.template.TemplateContainer; import org.eurocarbdb.resourcesdb.template.TrivialnameTemplate; import org.eurocarbdb.resourcesdb.util.StringUtils; public class BcsdbImporter extends StandardImporter implements MonosaccharideImporter { private BasetypeTemplate detectedSuperclass = null; private ArrayList<BasetypeTemplate> btList = new ArrayList<BasetypeTemplate>(); private Ringtype detectedRingtype = null; private String configurationSymbolsStr = ""; //***************************************************************************** //*** constructors: *********************************************************** //***************************************************************************** public BcsdbImporter(Config confObj) { this(null, null); } public BcsdbImporter(Config confObj, TemplateContainer container) { super(GlycanNamescheme.BCSDB, confObj, container); } public BcsdbImporter() { this(null, null); } //***************************************************************************** //*** getters/setters: ******************************************************** //***************************************************************************** /** * @return the detectedSuperclass */ public BasetypeTemplate getDetectedSuperclass() { return this.detectedSuperclass; } /** * @param detectedSuperclass the detectedSuperclass to set */ public void setDetectedSuperclass(BasetypeTemplate detectedSuperclass) { this.detectedSuperclass = detectedSuperclass; } public ArrayList<BasetypeTemplate> getBtList() { return this.btList; } public void addBasetypeToBtList(BasetypeTemplate tmpl) { this.btList.add(tmpl); } public String getConfigurationSymbolsStr() { return this.configurationSymbolsStr; } private void setConfigurationSymbolsStr(String confSymbolsStr) { this.configurationSymbolsStr = confSymbolsStr; } private void addConfigurationSymbol(char confSym) { this.configurationSymbolsStr += confSym; } public Ringtype getDetectedRingtype() { return detectedRingtype; } public void setDetectedRingtype(Ringtype detectedRingtype) { this.detectedRingtype = detectedRingtype; } //***************************************************************************** //*** parsing methods: ******************************************************** //***************************************************************************** public Monosaccharide parseMsString(String name) throws ResourcesDbException { Monosaccharide ms = new Monosaccharide(this.getConfig(), this.getTemplateContainer()); this.parseMsString(name, ms); return ms; } public void parseMsString(String name, Monosaccharide ms) throws ResourcesDbException { if(ms == null) { throw new NameParsingException("BcsdbImporter.parseMsString(String, Monosaccharide): Monosaccharide must not be null."); } if(name == null) { throw new NameParsingException("BcsdbImporter.parseMsString(String, Monosaccharide): Name must not be null."); } if(name.length()==0) { throw new NameParsingException("BcsdbImporter.parseMsString(String, Monosaccharide): Name must not be empty."); } ms.init(); ms.setCheckPositionsOnTheFly(false); this.setInputName(name); this.setFoundMs(false); this.setParsingPosition(0); ArrayList<CoreModification> coreModList = new ArrayList<CoreModification>(); TrivialnameTemplate trivTmpl = null; BasetypeTemplate btTmpl = null; //ArrayList<BasetypeTemplate> btTmplList = new ArrayList<BasetypeTemplate>(); //*** get anomer: *** Anomer anomer = Anomer.forBcsdbSymbol(this.getCurrentToken()); if(anomer == null) { if(!isBcsdbConfigSymbol(this.getCurrentToken())) { throw new NameParsingException("illegal anomer symbol: '" + this.getCurrentToken() + "'", name, 0); } } else { ms.setAnomer(anomer); this.increaseParsingPosition(); } //*** get absolute configuration symbol(s): *** while(isBcsdbConfigSymbol(this.getCurrentToken())) { //*** check, if substring starting at current position is a basetype name or trivialname, the first letter of which could be misinterpreted as a configuration symbol (as e.g. in DLyx, wich has to be read as D + Lyx and not DL + yx) *** if(name.length() - this.getParsingPosition() >= 3) { if(this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(name.substring(this.getParsingPosition(), this.getParsingPosition() + 3)) != null) { break; } } String trivName = this.checkForTrivialname(); if(trivName != null) { trivTmpl = this.getTemplateContainer().getTrivialnameTemplateContainer().forBasetypeName(this.getNamescheme(), trivName); this.increaseParsingPosition(trivName.length()); break; } this.addConfigurationSymbol(this.getCurrentToken()); this.increaseParsingPosition(); } if(trivTmpl == null) { //*** check for "pre-basetype" core modifications (deoxy-/anhydro): *** while(Character.isDigit(this.getCurrentToken())) { //*** check first, if residue contains a trivial name starting with a digit (such as 4eLeg) at current position: *** String trivName = this.checkForTrivialname(); if(trivName != null) { trivTmpl = this.getTemplateContainer().getTrivialnameTemplateContainer().forBasetypeName(this.getNamescheme(), trivName); this.increaseParsingPosition(trivName.length()); break; } //*** now check for the core modifications: *** ArrayList<Integer> modPosList = new ArrayList<Integer>(); int pos = this.parseIntNumber(); modPosList.add(pos); while(this.getCurrentToken() == ',') { this.increaseParsingPosition(); pos = this.parseIntNumber(); modPosList.add(pos); } String deoxyName = CoreModificationTemplate.DEOXY.getBcsdbName(); String anhydroName = CoreModificationTemplate.ANHYDRO.getBcsdbName(); if(this.getCurrentSubstring(deoxyName.length()).equals(deoxyName)) { for(Integer dPos : modPosList) { CoreModification mod = new CoreModification(CoreModificationTemplate.DEOXY, dPos); coreModList.add(mod); } this.increaseParsingPosition(); } else if(this.getCurrentSubstring(anhydroName.length()).equals(anhydroName)) { if(modPosList.size() != 2) { throw new NameParsingException("anhydro modification requires two positions", this.getInputName(), this.getParsingPosition()); } CoreModification mod = new CoreModification(CoreModificationTemplate.ANHYDRO, modPosList.get(0), modPosList.get(1)); coreModList.add(mod); this.increaseParsingPosition(3); } else { throw new NameParsingException("invalid modification name", this.getInputName(), this.getParsingPosition()); } } //*** check again for trivial name: *** String trivName = this.checkForTrivialname(); if(trivName != null) { trivTmpl = this.getTemplateContainer().getTrivialnameTemplateContainer().forBasetypeName(this.getNamescheme(), trivName); this.increaseParsingPosition(trivName.length()); } } if(trivTmpl == null) { //*** get basetype: *** String btName = this.getCurrentSubstring(3); if(btName.equalsIgnoreCase("thr")) { btName = "tro"; } btTmpl = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(btName); if(btTmpl == null) { throw new NameParsingException("basetype name expected", this.getInputName(), this.getParsingPosition()); } if(btTmpl.isSuperclass()) { if(this.getDetectedSuperclass() != null) { throw new NameParsingException("multiple basetype superclasses found", this.getInputName(), this.getParsingPosition()); } } else { this.addBasetypeToBtList(btTmpl); } this.increaseParsingPosition(3); //*** check for further basetype definitions, as in 4dxylHex: *** while(this.countRemainingTokens() >= 3) { btName = this.getCurrentSubstring(3); if(btName.equalsIgnoreCase("thr")) { btName = "tro"; } btTmpl = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(btName); if(btTmpl == null) { break; } if(btTmpl.isSuperclass()) { if(this.getDetectedSuperclass() != null) { throw new NameParsingException("multiple basetype superclasses found", this.getInputName(), this.getParsingPosition()); } this.setDetectedSuperclass(btTmpl); } else { this.addBasetypeToBtList(btTmpl); } this.increaseParsingPosition(3); } if(this.getDetectedSuperclass() != null) { ms.setSize(this.getDetectedSuperclass().getSize()); } else { if(this.getBtList().size() == 1) { ms.setSize(this.getBtList().get(0).getSize()); } } } else { ms.setSize(trivTmpl.getSize()); /*for(CoreModification mod : trivTmpl.getCoreModifications()) { ms.addCoreModification(mod); } for(Substitution subst : trivTmpl.getSubstitutions()) { ms.addSubstitution(subst); }*/ this.setDetectedTrivialname(trivTmpl); } //*** get ring type: *** if(this.hasCurrentToken()) { if(isBcsdbRingtypeSymbol(this.getCurrentToken())) { this.setDetectedRingtype(Ringtype.forBcsdbSymbol(String.valueOf(this.getCurrentToken()))); this.increaseParsingPosition(); } } //*** add previously detected core modifications: *** for(CoreModification mod : coreModList) { ms.addCoreModification(mod); } //*** check for "post-basetype" core modifications / substitutions (including alditol or acid): *** this.parseModifications(ms); //*** build the monosaccharide from the parsed information: *** this.processParsedData(ms); } private void parseModifications(Monosaccharide ms) throws ResourcesDbException { while(this.hasCurrentToken()) { if(this.getCurrentToken() == '-') { this.increaseParsingPosition(); } ArrayList<Integer> digitList = null; if(Character.isDigit(this.getCurrentToken())) { digitList = this.parseIntNumberList(); if(this.getCurrentToken() == '-') { this.increaseParsingPosition(); } } if(this.hasCurrentSubstring("en")) { if(digitList == null) { throw new NameParsingException("number expected", this.getInputName(), this.getParsingPosition()); } for(Integer pos1 : digitList) { CoreModification mod = new CoreModification(CoreModificationTemplate.EN, pos1, pos1 + 1); ms.addCoreModification(mod); } this.increaseParsingPosition(2); continue; } if(this.hasCurrentSubstring("ulo")) { if(digitList == null) { digitList = new ArrayList<Integer>(); digitList.add(2); //*** default keto position *** } for(Integer pos1 : digitList) { CoreModification mod = new CoreModification(CoreModificationTemplate.KETO, pos1); ms.addCoreModification(mod); } this.increaseParsingPosition(3); //*** check, if residue is ulosonic or ulosaric: *** if(this.hasCurrentSubstring("sonic")) { ms.setAldonic(); this.increaseParsingPosition(5); } else if(this.hasCurrentSubstring("saric")) { ms.setAldaric(); this.increaseParsingPosition(5); } continue; } if(this.hasCurrentSubstring("onic")) { if(digitList != null) { throw new NameParsingException("Cannot assign given position to 'onic' modification", this.getInputName(), this.getParsingPosition()); } ms.setAldonic(); this.increaseParsingPosition(4); continue; } else if(this.hasCurrentSubstring("aric")) { if(digitList != null) { throw new NameParsingException("Cannot assign given position to 'aric' modification", this.getInputName(), this.getParsingPosition()); } ms.setAldaric(); this.increaseParsingPosition(4); continue; } String matchedSubstName = ""; for(String substName : this.getTemplateContainer().getSubstituentTemplateContainer().getResidueIncludedNameList(this.getNamescheme())) { if(this.hasCurrentSubstring(substName)) { if(substName.length() > matchedSubstName.length()) { matchedSubstName = substName; } } } if(matchedSubstName.length() > 0) { //*** substitution found *** if(digitList == null) { digitList = new ArrayList<Integer>(); digitList.add(2); } this.addParsedSubstitution(ms, matchedSubstName, digitList); this.increaseParsingPosition(matchedSubstName.length()); continue; } if(this.getCurrentToken() == 'A') { if(digitList != null) { throw new NameParsingException("Cannot assign given position to 'A' modification", this.getInputName(), this.getParsingPosition()); } ms.setUronic(); this.increaseParsingPosition(); continue; } if(this.hasCurrentSubstring("ol")) { if(digitList != null) { throw new NameParsingException("Cannot assign given position to 'ol' modification", this.getInputName(), this.getParsingPosition()); } ms.setAlditol(true); this.increaseParsingPosition(2); continue; } throw new NameParsingException("cannot parse substring '" + this.getInputName().substring(this.getParsingPosition()) + "'", this.getInputName(), this.getParsingPosition()); } } private void processParsedData(Monosaccharide ms) throws ResourcesDbException { boolean expandChiralonly = false; if(this.getDetectedTrivialname() != null) { //*** set monosaccharide properties defined by the trivialname: *** ms.init(this.getDetectedTrivialname()); //*** prepare stereocode: *** String stereo = this.getDetectedTrivialname().getStereocode(); if(this.getDetectedTrivialname().isDefaultConfigIsCompulsory()) { if(!this.getConfigurationSymbolsStr().equals(String.valueOf(StereoConfiguration.Nonchiral.getBcsdbSymbol()))) { if(!this.getConfigurationSymbolsStr().equals(String.valueOf(this.getDetectedTrivialname().getDefaultConfiguration().getBcsdbSymbol()))) { if(!this.getConfigurationSymbolsStr().equals("")) { throw new NameParsingException("trivial name " + this.getDetectedTrivialname().getLongName() + " requires absolute configuration " + StereoConfiguration.Nonchiral.getBcsdbSymbol()); } } } if(this.getDetectedTrivialname().getDefaultConfiguration().equals(StereoConfiguration.Laevus)) { stereo = Stereocode.changeDLinStereoString(stereo); } } else { if(this.getConfigurationSymbolsStr().equals(String.valueOf(StereoConfiguration.Unknown.getBcsdbSymbol()))) { stereo = Stereocode.absoluteToRelative(stereo); } else if(this.getConfigurationSymbolsStr().equals(String.valueOf(StereoConfiguration.Laevus.getBcsdbSymbol()))) { stereo = Stereocode.changeDLinStereoString(stereo); } } //stereo = StereoConfiguration.Nonchiral.getStereosymbol() + stereo + StereoConfiguration.Nonchiral.getStereosymbol(); ms.setDefaultCarbonylPosition(this.getDetectedTrivialname().getCarbonylPosition()); //ms.setStereoStr(stereo); this.setTmpStereocode(stereo); } else { //*** no trivial name *** //*** get monosaccharide size and stereocode from basetypelist: *** if(this.getDetectedSuperclass() == null) { if(this.getBtList().size() > 1) { throw new NameParsingException("multiple basetypes but no superclass found", this.getInputName(), 0); } if(this.getBtList().size() == 0) { throw new NameParsingException("no monosaccharide basetype detected", this.getInputName(), 0); } ms.setSize(this.getBtList().get(0).getSize()); //*** prepare stereocode: *** String stereo = this.getBtList().get(0).getStereocode(); if(this.getConfigurationSymbolsStr().length() == 0 || this.getConfigurationSymbolsStr().equals(String.valueOf(StereoConfiguration.Unknown.getBcsdbSymbol()))) { stereo = Stereocode.absoluteToRelative(stereo); } else if(this.getConfigurationSymbolsStr().equals(String.valueOf(StereoConfiguration.Laevus.getBcsdbSymbol()))) { stereo = Stereocode.changeDLinStereoString(stereo); } else if(!this.getConfigurationSymbolsStr().equals(String.valueOf(StereoConfiguration.Dexter.getBcsdbSymbol()))) { throw new NameParsingException("cannot apply stereoconfiguration " + this.getConfigurationSymbolsStr() + " to basetype " + this.getBtList().get(0).getBaseName()); } this.setTmpStereocode(stereo); } else { //*** main basetype is superclass (as in 4dxylHex) *** expandChiralonly = true; ms.setSize(this.getDetectedSuperclass().getSize()); //*** prepare stereocode: *** String stereo = ""; if(this.getConfigurationSymbolsStr().length() > 2) { if(this.getConfigurationSymbolsStr().charAt(0) == StereoConfiguration.Nonchiral.getBcsdbSymbol()) { //*** delete leading X in config string of a residue like aXDDmanHep: *** this.setConfigurationSymbolsStr(this.getConfigurationSymbolsStr().substring(1)); } } if(this.getBtList().size() == 0) { //*** residue is superclass without stereo information *** stereo = StringUtils.multiplyChar(StereoConfiguration.Unknown.getStereosymbol(), ms.getSize() - 2); expandChiralonly = false; } else { if(this.getConfigurationSymbolsStr().length() == this.getBtList().size() + 1) { stereo += StereoConfiguration.forBcsdbSymbol(this.getConfigurationSymbolsStr().charAt(0)).getStereosymbol(); this.setConfigurationSymbolsStr(this.getConfigurationSymbolsStr().substring(1)); } if(this.getConfigurationSymbolsStr().length() != this.getBtList().size()) { throw new NameParsingException("number of configuration symbols does not match number of basetypes", this.getInputName(), 0); } for(int i = 0; i < this.getBtList().size(); i++) { String tmpStereo = this.getBtList().get(i).getStereocode(); if(this.getConfigurationSymbolsStr().charAt(i) == StereoConfiguration.Laevus.getBcsdbSymbol()) { tmpStereo = Stereocode.changeDLinStereoString(tmpStereo); } else if(this.getConfigurationSymbolsStr().charAt(i) == StereoConfiguration.Unknown.getBcsdbSymbol()) { tmpStereo = Stereocode.absoluteToRelative(tmpStereo); } stereo = tmpStereo + stereo; } this.setTmpStereocode(stereo); } } //*** set default ring start: *** ArrayList<CoreModification> ketoList = ms.getCoreModifications(CoreModificationTemplate.KETO); if(ketoList != null && ketoList.size() > 0) { for(CoreModification mod : ketoList) { if(mod.getIntValuePosition1() > 0) { if(ms.getDefaultCarbonylPosition() == 0) { ms.setDefaultCarbonylPosition(mod.getIntValuePosition1()); } else if(mod.getIntValuePosition1() < ms.getDefaultCarbonylPosition()) { ms.setDefaultCarbonylPosition(mod.getIntValuePosition1()); } } } } else { ms.setDefaultCarbonylPosition(1); } } //*** set ring: *** if(ms.getRingStart() == Basetype.UNKNOWN_RING) { if(ms.isAlditol()) { ms.setRingStart(Basetype.OPEN_CHAIN); } else { ms.setRingStart(ms.getDefaultCarbonylPosition()); } } if(this.getDetectedRingtype() != null) { if(ms.isAlditol()) { if(this.getDetectedRingtype().equals(Ringtype.PYRANOSE) || this.getDetectedRingtype().equals(Ringtype.FURANOSE)) { throw new MonosaccharideException("Ringtype " + this.getDetectedRingtype().getName() + " is not allowed together with alditol modification."); } } else { ms.setRingtype(this.getDetectedRingtype()); } } //*** set correct anomer: *** if(!(Anomer.ALPHA.equals(ms.getAnomer()) || Anomer.BETA.equals(ms.getAnomer()))) { if(ms.getRingStart() > 0) { if(ms.isStereolossPositionWithIgnoreType(ms.getRingStart(), CoreModificationTemplate.KETO)) { ms.setAnomer(Anomer.NONE); } else { ms.setAnomer(Anomer.UNKNOWN); } } else if(ms.getRingStart() == Basetype.OPEN_CHAIN) { ms.setAnomer(Anomer.OPEN_CHAIN); } else { ms.setAnomer(Anomer.UNKNOWN); } } //*** finish stereocode: *** String stereo = this.getTmpStereocode(); if(expandChiralonly) { stereo = Stereocode.expandChiralonlyStereoString(stereo, ms); } else { stereo = StereoConfiguration.Nonchiral.getStereosymbol() + this.getTmpStereocode() + StereoConfiguration.Nonchiral.getStereosymbol(); stereo = Stereocode.markNonchiralPositionsInStereoString(stereo, ms); } ms.setStereoStr(stereo); ms.setAnomerInStereocode(); //*** check correctness and build ms name: *** MonosaccharideValidation.checkMonosaccharideConsistency(ms, this.getTemplateContainer(), this.getConfig()); ms.buildName(); } //***************************************************************************** //*** other methods: ********************************************************** //***************************************************************************** /** * Check, if a given character is a valid StereoConfiguration symbol in bcsdb notation * @param symbol * @return */ public static boolean isBcsdbConfigSymbol(char symbol) { return StereoConfiguration.forBcsdbSymbol(symbol) != null; } /** * Check, if a given character is a valid Ringtype symbol in bcsdb notation * @param symbol * @return */ public static boolean isBcsdbRingtypeSymbol(char symbol) { return Ringtype.forBcsdbSymbol(String.valueOf(symbol)) != null; } public void init() { super.init(); this.setDetectedRingtype(null); this.setDetectedSuperclass(null); this.setConfigurationSymbolsStr(""); this.btList.clear(); } }