/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$
*/
package org.eurocarbdb.resourcesdb.io;
import java.util.ArrayList;
import org.eurocarbdb.resourcesdb.*;
import org.eurocarbdb.resourcesdb.glycoconjugate_derived.LinkageType;
import org.eurocarbdb.resourcesdb.monosaccharide.*;
import org.eurocarbdb.resourcesdb.template.BasetypeTemplate;
import org.eurocarbdb.resourcesdb.template.SubstituentTemplate;
import org.eurocarbdb.resourcesdb.template.TemplateContainer;
import org.eurocarbdb.resourcesdb.template.TrivialnameTemplate;
import org.eurocarbdb.resourcesdb.util.NumberPrefix;
import org.eurocarbdb.resourcesdb.util.StringUtils;
/**
* Importer class for monosaccharides encoded in CarbBank style formats (CarbBank, Glycosciences.de, Iupac)
* @author Thomas Luetteke
*
*/
public class CarbbankImporter extends StandardImporter implements MonosaccharideImporter {
private String prePosStr;
private String postPosStr;
private BasetypeTemplate detectedBasetype;
private BasetypeTemplate detectedSubtype;
private Ringtype detectedRingtype;
private int fuzzy;
private int preposWildcards;
private boolean useDefaultValues = false;
//*****************************************************************************
//*** constructors: ***********************************************************
//*****************************************************************************
public CarbbankImporter() {
this(GlycanNamescheme.CARBBANK, null, null);
}
public CarbbankImporter(GlycanNamescheme scheme, Config confObj) {
this(scheme, confObj, null);
}
public CarbbankImporter(GlycanNamescheme scheme, Config confObj, TemplateContainer container) {
super(scheme, confObj, container);
this.init();
}
//*****************************************************************************
//*** getters/setters: ********************************************************
//*****************************************************************************
/**
* @return the postPosStr
*/
private String getPostPosStr() {
return this.postPosStr;
}
/**
* @param postPosStr the postPosStr to set
*/
private void setPostPosStr(String postPosStr) {
this.postPosStr = postPosStr;
}
/**
* @return the prePosStr
*/
private String getPrePosStr() {
return this.prePosStr;
}
/**
* @param prePosStr the prePosStr to set
*/
private void setPrePosStr(String prePosStr) {
this.prePosStr = prePosStr;
}
/**
* @return the detectedBasetype
*/
public BasetypeTemplate getDetectedBasetype() {
return this.detectedBasetype;
}
/**
* @param detectedBasetype the detectedBasetype to set
*/
public void setDetectedBasetype(BasetypeTemplate detectedBasetype) {
this.detectedBasetype = detectedBasetype;
}
/**
* @return the detectedSubtype
*/
public BasetypeTemplate getDetectedSubtype() {
return this.detectedSubtype;
}
/**
* @param detectedSubtype the detectedSubtype to set
*/
public void setDetectedSubtype(BasetypeTemplate detectedSubtype) {
this.detectedSubtype = detectedSubtype;
}
/**
* @return the useDefaultValues
*/
public boolean isUseDefaultValues() {
return this.useDefaultValues;
}
/**
* @param useDefaultValues the useDefaultValues to set
*/
public void setUseDefaultValues(boolean useDefaultValues) {
this.useDefaultValues = useDefaultValues;
}
/**
* @return the preposWildcards
*/
private int getPreposWildcards() {
return this.preposWildcards;
}
/**
* @param preposWildcards the preposWildcards to set
*/
private void setPreposWildcards(int preposWildcards) {
this.preposWildcards = preposWildcards;
}
private void increasePreposWildcards() {
setPreposWildcards(getPreposWildcards() + 1);
}
private void decreasePreposWildcards() {
setPreposWildcards(getPreposWildcards() - 1);
}
/**
* @return the fuzzy
*/
private int getFuzzy() {
return this.fuzzy;
}
/**
* @param fuzzy the fuzzy to set
*/
private void setFuzzy(int fuzzy) {
this.fuzzy = fuzzy;
}
private void addFuzzy() {
setFuzzy(getFuzzy() + 1);
}
public boolean isFuzzy() {
return(getFuzzy() > 0);
}
//*****************************************************************************
//*** parsing methods: ********************************************************
//*****************************************************************************
public Monosaccharide parseMsString(String name) throws ResourcesDbException {
Monosaccharide ms = new Monosaccharide(this.getConfig(), this.getTemplateContainer());
this.parseMsString(name, ms);
return(ms);
}
public void parseMsString(String name, Monosaccharide ms) throws ResourcesDbException {
if(ms == null) {
throw new NameParsingException("CarbbankImporter.parseMsString(String, Monosaccharide): Monosaccharide must not be null.");
}
ms.init();
ms.setCheckPositionsOnTheFly(false);
this.setInputName(name);
this.setFoundMs(false);
ArrayList<String> basetypesSuperclassList = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeListSuperclass();
ArrayList<String> basetypesSpecificList = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeListSpecific();
String nameLowercase = name.toLowerCase();
//*** search for superclass basetype (like hex, hep, pen, ...): ***
for(int i = 0; i < basetypesSuperclassList.size(); i++) {
int basepos = nameLowercase.indexOf(basetypesSuperclassList.get(i));
if(basepos != -1) {
this.setPrePosStr(name.substring(0, basepos));
this.setPostPosStr(name.substring(basepos + basetypesSuperclassList.get(i).length()));
BasetypeTemplate basetype = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(basetypesSuperclassList.get(i));
this.setDetectedBasetype(basetype);
ms.setSize(basetype.getSize());
ms.setDefaultCarbonylPosition(basetype.getCarbonylPosition());
if(getPrePosStr().length() >= 3) {
String subtypeStr = getPrePosStr().substring(getPrePosStr().length() - 3, getPrePosStr().length()).toLowerCase();
if(subtypeStr.equals("thr")) { //*** for threose subtype usually "thr" is used instead of "tro" ***
subtypeStr = "tro";
}
BasetypeTemplate subtype = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(subtypeStr);
if(subtype == null) {
//TODO: check for ketose subtype in case subtype is null
//*** no subtype found yet, check for ketose subtype: ***
TrivialnameTemplate trivSubtype = this.getTemplateContainer().getTrivialnameTemplateContainer().forBasetypeName(this.getNamescheme(), subtypeStr);
if(trivSubtype != null) {
}
}
if(subtype != null) {
this.setDetectedSubtype(subtype);
this.setTmpStereocode(subtype.getStereocode());
ms.setDefaultCarbonylPosition(subtype.getCarbonylPosition());
this.setPrePosStr(getPrePosStr().substring(0,getPrePosStr().length() - 3));
}
}
this.setFoundMs(true);
break;
}
}
//*** if no ms basetype was found yet, search for specific type (like glc, gal, man, ...) ***
if(!isFoundMs()) {
for(int i = 0; i < basetypesSpecificList.size(); i++) {
int basepos = nameLowercase.indexOf(basetypesSpecificList.get(i));
if(basepos != -1) {
this.setPrePosStr(name.substring(0, basepos));
this.setPostPosStr(name.substring(basepos + basetypesSpecificList.get(i).length()));
BasetypeTemplate basetype = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(basetypesSpecificList.get(i));
this.setDetectedBasetype(basetype);
ms.setSize(basetype.getSize());
ms.setDefaultCarbonylPosition(basetype.getCarbonylPosition());
this.setTmpStereocode(basetype.getStereocode());
this.setFoundMs(true);
break;
}
}
}
//*** if still no ms basetype was found, try trivial names: ***
if(!isFoundMs()) {
ArrayList<String> trivialNamesList = this.getTemplateContainer().getTrivialnameTemplateContainer().getTrivialnameBasetypeList(GlycanNamescheme.CARBBANK);
for(String basename : trivialNamesList) {
int basepos = nameLowercase.indexOf(basename);
if(basepos != -1) {
this.setPrePosStr(name.substring(0, basepos));
this.setPostPosStr(name.substring(basepos + basename.length()));
TrivialnameTemplate template = this.getTemplateContainer().getTrivialnameTemplateContainer().forBasetypeName(this.getNamescheme(), basename);
this.setDetectedBasetype(template);
this.setDetectedTrivialname(template);
this.setTmpStereocode(template.getStereocode());
this.setFoundMs(true);
ms.init(template);
/*ms.setSize(template.getSize());
ms.setDefaultCarbonylPosition(template.getCarbonylPosition());
for(CoreModification coremod : template.getCoreModifications()) {
try {
ms.addCoreModification(coremod.clone());
} catch(MonosaccharideException me) {
throw new MonosaccharideException("Internal error: " + me.getMessage(), me);
}
}
for(Substitution subst : template.getSubstitutions()) {
try {
ms.addSubstitution(subst.clone());
} catch(MonosaccharideException me) {
throw new MonosaccharideException("Internal error: " + me.getMessage(), me);
}
}*/
break;
}
}
}
if(isFoundMs()) {
this.parsePreposStr(getPrePosStr(), ms);
this.parsePostposStr(getPostPosStr(), ms);
this.processParsedData(ms);
} else {
throw new NameParsingException("Could not find ms basetype in " + name);
}
if(this.getDetectedTrivialname() != null) {
TrivialnameTemplate trivTmpl = this.getDetectedTrivialname();
if(trivTmpl.isDefaultConfigIsCompulsory()) {
if(ms.getConfiguration() == null) {
ms.setConfiguration(trivTmpl.getDefaultConfiguration());
}
if(!trivTmpl.getDefaultConfiguration().equals(ms.getConfiguration())) {
//TODO: store actually detected name and use this instead of primary name (in case more than one name is defined for a trivialname template in one namescheme)
throw new NameParsingException("Trivialname '" + trivTmpl.getPrimaryName(this.getNamescheme()) + "' is not defined for configuration " + ms.getConfiguration().getSymbol() + ".");
}
}
}
MonosaccharideValidation.checkMonosaccharideConsistency(ms, this.getTemplateContainer(), this.getConfig());
}
private void parsePreposStr(String preposStr, Monosaccharide ms) throws ResourcesDbException {
while(preposStr.length() > 0) {
if(preposStr.startsWith("-")) {
preposStr = preposStr.substring(1); //*** make sure no dash is left at the beginning of parsed string ***
this.increaseParsingPosition();
continue;
}
//*** check for open chain marker: ***
if(preposStr.startsWith("aldehydo-")) {
ms.setRingEnd(Basetype.OPEN_CHAIN);
ms.setRingStart(Basetype.OPEN_CHAIN);
preposStr = preposStr.substring(9);
this.increaseParsingPosition(9);
this.detectedRingtype = Ringtype.OPEN;
continue;
}
if(preposStr.startsWith("keto-")) {
ms.setRingEnd(Basetype.OPEN_CHAIN);
ms.setRingStart(Basetype.OPEN_CHAIN);
preposStr = preposStr.substring(5);
this.increaseParsingPosition(5);
this.detectedRingtype = Ringtype.OPEN;
continue;
}
//TODO: add checks, if marker type matches ms type (aldose / ketose)
//*** check for wildcard: ***
if(preposStr.startsWith("?-")) {
this.increasePreposWildcards();
preposStr = preposStr.substring(2);
this.increaseParsingPosition(2);
//*** check, if a monosaccharide basetype name follows the wildcard (something like ?-gro) ***
if(preposStr.length() > 2) {
BasetypeTemplate subtype2 = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(preposStr.substring(0, 3));
if(subtype2 != null) {
preposStr = preposStr.substring(3);
this.increaseParsingPosition(3);
String subStereoStr = Stereocode.absoluteToRelative(subtype2.getStereocode());
ms.setStereoStr(subStereoStr + ms.getStereoStr());
addFuzzy();
this.decreasePreposWildcards();
}
}
continue;
}
//*** check for anomer: ***
if(preposStr.toLowerCase().startsWith("a-") || preposStr.toLowerCase().startsWith("b-")) {
if(ms.getAnomer() != null) { //*** anomer already set ***
throw new NameParsingException("multiple definition of anomer.", this.getInputName(), this.getParsingPosition());
} else {
ms.setAnomer(preposStr.substring(0,1));
}
preposStr = preposStr.substring(2);
this.increaseParsingPosition(2);
continue;
}
if(preposStr.toLowerCase().startsWith("alpha")) {
if(ms.getAnomer() != null) { //*** anomer already set ***
throw new NameParsingException("multiple definition of anomer.", this.getInputName(), this.getParsingPosition());
} else {
ms.setAnomer("a");
}
preposStr = preposStr.substring(5);
this.increaseParsingPosition(5);
continue;
}
if(preposStr.toLowerCase().startsWith("beta")) {
if(ms.getAnomer() != null) { //*** anomer already set ***
throw new NameParsingException("multiple definition of anomer.", this.getInputName(), this.getParsingPosition());
} else {
ms.setAnomer("b");
}
preposStr = preposStr.substring(4);
this.increaseParsingPosition(4);
continue;
}
//*** check for configuration: ***
int tmpParsingPosition = this.getParsingPosition();
if(preposStr.toLowerCase().startsWith("d-") || preposStr.toLowerCase().startsWith("l-")) {
String configStr = preposStr.substring(0, 1).toLowerCase();
preposStr = preposStr.substring(2);
this.increaseParsingPosition(2);
//*** check, if a monosaccharide basetype name follows the configuration (something like "d-gro" in d-gro-a-d-manhepp) ***
BasetypeTemplate subtype2 = null;
if(preposStr.length() > 2) {
String subtypeStr = preposStr.substring(0, 3);
subtype2 = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(subtypeStr);
}
if(subtype2 != null) {
String subStereo = subtype2.getStereocode();
if(configStr.equals("l")) {
subStereo = Stereocode.changeDLinStereoString(subStereo);
}
ms.setStereoStr(subStereo + ms.getStereoStr());
preposStr = preposStr.substring(3);
this.increaseParsingPosition(3);
continue;
}
if(ms.getConfiguration() != null) {
throw new NameParsingException("Found multiple configuration definitions.", this.getInputName(), tmpParsingPosition);
} else {
ms.setConfiguration(configStr);
}
continue;
}
//*** check for modifications: ***
String tmpStr = preposStr;
preposStr = parseModifications(preposStr, ms);
if(!tmpStr.equals(preposStr)) { //*** modifications where found ***
continue;
}
//*** check for additional subtype (as it might occurr in residues with more than 6 backbone carbon atoms): ***
//*** it will only be found here if no configuration symbol is assigned to this subtype - otherwise it will have been parsed above already ***
//*** therefore, the stereocode has to be set to a relative definition if a subtype is found here: ***
if(preposStr.length() > 2) {
BasetypeTemplate subtype2 = null;
subtype2 = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(preposStr.substring(0, 3));
if(subtype2 != null) {
String subStereo = Stereocode.absoluteToRelative(subtype2.getStereocode());
ms.setStereoStr(subStereo + ms.getStereoStr());
preposStr = preposStr.substring(3);
this.increaseParsingPosition(3);
continue;
}
}
//*** no known elements detected in current part of preposStr => no further parsing possible ***
throw new NameParsingException("Cannot parse " + preposStr, this.getInputName(), this.getParsingPosition());
}
}
private void parsePostposStr(String postposStr, Monosaccharide ms) throws ResourcesDbException {
if(postposStr.startsWith("-")) {
postposStr = postposStr.substring(1);
this.increaseParsingPosition(1);
}
//*** check for ulo modification that might stand between base type and ring type: ***
if(postposStr.toLowerCase().matches("^[0-9]+(,[0-9])*(-)?(di|tri|tetra){0,1}(-)?ulo(.*)")) {
postposStr = parseModifications(postposStr, ms);
}
//*** check for ring type: ***
if(postposStr.toLowerCase().startsWith("p")) {
this.detectedRingtype = Ringtype.PYRANOSE;
//ms.setRingtype(Ringtype.pyranose);
postposStr = postposStr.substring(1);
this.increaseParsingPosition(1);
} else if(postposStr.toLowerCase().startsWith("f")) {
this.detectedRingtype = Ringtype.FURANOSE;
//ms.setRingtype(Ringtype.furanose);
postposStr = postposStr.substring(1);
this.increaseParsingPosition(1);
}
while(postposStr.length() > 0) {
if(postposStr.startsWith("-")) {
postposStr = postposStr.substring(1);
this.increaseParsingPosition(1);
continue;
}
if(postposStr.toLowerCase().startsWith("ol")) {
ms.setAlditol(true);
this.detectedRingtype = Ringtype.OPEN;
postposStr = postposStr.substring(2);
this.increaseParsingPosition(2);
continue;
}
if(postposStr.toLowerCase().startsWith("onic")) {
ms.setAldonic();
postposStr = postposStr.substring(4);
this.increaseParsingPosition(4);
continue;
}
if(postposStr.toLowerCase().startsWith("aric")) {
ms.setAldaric();
postposStr = postposStr.substring(4);
this.increaseParsingPosition(4);
continue;
}
if(postposStr.toLowerCase().startsWith("a")) {
boolean matchesSubstitution = false;
ArrayList<String> substNameList = this.getTemplateContainer().getSubstituentTemplateContainer().getResidueIncludedNameList(this.getNamescheme());
for(int i = 0; i < substNameList.size(); i++) {
if(postposStr.toLowerCase().startsWith(substNameList.get(i).toLowerCase())) {
matchesSubstitution = true;
break;
}
}
if(!matchesSubstitution) {
ms.setUronic();
postposStr = postposStr.substring(1);
this.increaseParsingPosition(1);
continue;
}
}
//*** check for modifications: ***
String tmpStr = postposStr;
postposStr = parseModifications(postposStr, ms);
if(!tmpStr.equals(postposStr)) { //*** modifications where found ***
continue;
}
//*** unparsable string: ***
throw new NameParsingException("Cannot parse " + postposStr, this.getInputName(), this.getParsingPosition());
}
}
private void processParsedData(Monosaccharide ms) throws ResourcesDbException {
//*** set ring: ***
if(ms.getRingStart() == Basetype.UNKNOWN_RING) {
ms.setRingStart(ms.getDefaultCarbonylPosition());
}
if(this.detectedRingtype == null) {
if(this.useDefaultValues) {
if(this.getDetectedTrivialname() != null) {
ms.setRingEnd(this.getDetectedTrivialname().getDefaultRingend());
} else if(this.getDetectedBasetype() != null) {
ms.setRingEnd(this.getDetectedBasetype().getDefaultRingend());
}
}
} else {
ms.setRingtype(this.detectedRingtype);
}
//*** check for completeness / excluding information: ***
if(ms.getConfiguration() == null) {
if(isUseDefaultValues() && getDetectedBasetype().getDefaultConfiguration() != null) {
ms.setConfiguration(getDetectedBasetype().getDefaultConfiguration());
} else {
if(isUseDefaultValues() && getDetectedSubtype() != null && getDetectedSubtype().getDefaultConfiguration() != null) {
ms.setConfiguration(getDetectedSubtype().getDefaultConfiguration());
} else {
ms.setConfiguration(StereoConfiguration.Unknown);
}
}
}
//*** set stereocode: ***
String stereo;
if(getTmpStereocode() == null || getTmpStereocode().length() == 0) {
if(getDetectedBasetype().isSuperclass()) {
/*for(int i = 0; i < getDetectedBasetype().getSize() - 2 - ms.getStereoStr().length(); i++) {
setTmpStereocode(getTmpStereocode() + StereoConfiguration.Unknown.getStereosymbol());
}*/
setTmpStereocode(StringUtils.multiplyChar(StereoConfiguration.Unknown.getStereosymbol(), getDetectedBasetype().getSize() - 2 - ms.getStereoStr().length()));
}
}
if(ms.getConfiguration().equals(StereoConfiguration.Laevus)) {
stereo = Stereocode.changeDLinStereoString(getTmpStereocode()) + ms.getStereoStr();
} else if(ms.getConfiguration() == null || ms.getConfiguration().equals(StereoConfiguration.Unknown)) { //*** absolute configuration is not known ***
stereo = Stereocode.absoluteToRelative(getTmpStereocode()) + ms.getStereoStr();
} else {
stereo = getTmpStereocode() + ms.getStereoStr();
}
//TODO: consider lactone modification
if(ms.hasCoreModification(CoreModificationTemplate.ACID, 1)) {
if(ms.getRingStart() == 1) {
if(ms.getRingEnd() > 0) {
throw new MonosaccharideException("Aldonic residue with open chain cannot have ring oxygen " + ms.getRingEnd());
} else {
ms.setRingEnd(Basetype.OPEN_CHAIN);
}
}
}
if(ms.getAnomer() == null) {
if(ms.isAlditol() || ms.getRingEnd() == Basetype.OPEN_CHAIN) {
ms.setAnomer(Anomer.OPEN_CHAIN);
} else {
ms.setAnomer(Anomer.UNKNOWN);
}
}
stereo = StereoConfiguration.Nonchiral.getStereosymbol() + stereo + StereoConfiguration.Nonchiral.getStereosymbol();
//*** handle loss of stereochemistry: ***
if(getDetectedBasetype().isSuperclass()) {
if(getDetectedSubtype() != null) { //*** monosaccharide name contains combination of superclass and subtype, like xylHex ***
stereo = Stereocode.getChiralOnlyStereoString(stereo);
stereo = Stereocode.expandChiralonlyStereoString(stereo, ms);
if(stereo.length() != ms.getSize()) {
throw new MonosaccharideException("Error in stereocode: size is " + stereo.length() + ", ms size is " + ms.getSize());
}
}
} else {
stereo = Stereocode.markNonchiralPositionsInStereoString(stereo, ms);
}
ms.setStereoStr(stereo);
//*** set stereocode resulting from anomeric: ***
ms.setAnomerInStereocode();
//*** root templates: ***
//setRootTemplates(ms);
ms.setFuzzy(this.isFuzzy());
}
private String parseModifications(String parseStr, Monosaccharide ms) throws ResourcesDbException {
if(parseStr.startsWith(";")) { //*** in case a semicolon is used as delimiter, remove it ***
parseStr = parseStr.substring(1);
this.increaseParsingPosition(1);
}
if(parseStr.startsWith("-")) { //*** make sure that no dash is left at the beginning of the string ***
parseStr = parseStr.substring(1);
this.increaseParsingPosition(1);
}
if(parseStr.length() > 0) {
//*** read position(s): ***
int defaultPos = 0;
ArrayList<Integer> positions = new ArrayList<Integer>();
ArrayList<Integer> autoPosList = new ArrayList<Integer>();
String posStr = "";
boolean foundSubstitution = false;
boolean foundCoreModification = false;
while(parseStr.matches("^[0-9].*")) {
posStr += parseStr.substring(0,1);
parseStr = parseStr.substring(1);
this.increaseParsingPosition(1);
}
if(posStr.equals("")) {
if(parseStr.startsWith("?")) {
posStr = "0";
parseStr = parseStr.substring(1);
this.increaseParsingPosition(1);
} else if(this.getDetectedTrivialname() != null && this.getDetectedTrivialname().getPrimaryName(this.getNamescheme()).equalsIgnoreCase("neu")) {
if(ms.countSubstitutions() == 1 && ms.getSubstitution(SubstituentTemplate.AMINOTEMPLATENAME, 5, LinkageType.DEOXY) != null) { //*** no modifications apart from the 5N that is included in the trivial name set so far ***
//TODO: replace
if(parseStr.toLowerCase().matches("^n{0,1}([ag]c)$") || parseStr.toLowerCase().matches("^n{0,1}([ag]c)[0-9](.*)")) {
if(isUseDefaultValues()) {
defaultPos = 5;
posStr = "5";
} else {
defaultPos = -1;
posStr = "0";
}
}
}
} else if(ms.countSubstitutions() == 0) {
//if(this.namescheme.equals(GlycanNamescheme.GLYCOSCIENCES)) {
defaultPos = 2;
posStr = "2";
/*} else {
if(parseStr.toLowerCase().matches("^n(ac){0,1}$") || parseStr.toLowerCase().matches("^n(ac){0,1}[0-9].*") || parseStr.toLowerCase().matches("^n(ac){0,1}-.*")) {
defaultPos = 2;
posStr = "2";
}
}*/
} else {
defaultPos = -1;
posStr = "0";
}
}
if(posStr.equals("")) {
defaultPos = -1;
posStr = "0";
}
if(defaultPos > 0) {
autoPosList.add(new Integer(posStr));
} else {
positions.add(new Integer(posStr));
}
//*** check for further, comma-separated positions: ***
while(parseStr.matches("^,[0-9?].*")) {
posStr = parseStr.substring(1,2);
parseStr = parseStr.substring(2);
this.increaseParsingPosition(2);
while(parseStr.matches("^[0-9].*")) {
posStr += parseStr.substring(0,1);
parseStr = parseStr.substring(1);
this.increaseParsingPosition(1);
}
positions.add(new Integer(posStr));
}
if(parseStr.startsWith("-")) {
parseStr = parseStr.substring(1);
this.increaseParsingPosition(1);
}
//*** get modification name: ***
String modStr = "";
ArrayList<String> coreModList = CoreModificationTemplate.getCarbbankNamesList();
for(int i = 0; i < coreModList.size(); i++) {
String coremodName = coreModList.get(i);
if(coremodName.length() > modStr.length()) {
if(parseStr.toLowerCase().startsWith(coremodName)) {
modStr = coremodName;
foundCoreModification = true;
}
}
}
ArrayList<String> substTemplateList = this.getTemplateContainer().getSubstituentTemplateContainer().getResidueIncludedNameList(this.getNamescheme());
for(int i = 0; i < substTemplateList.size(); i++) {
String substName = substTemplateList.get(i).toLowerCase();
if(substName.length() > modStr.length()) {
if(parseStr.toLowerCase().startsWith(substName)) {
modStr = substName;
foundSubstitution = true;
}
}
}
ArrayList<NumberPrefix> numberPrefixList = new ArrayList<NumberPrefix>();
int numberPrefixStrLength = 0;
if((modStr.length() == 0) && (positions.size() + autoPosList.size() > 1)) { //*** no modification, but positions found so far ***
numberPrefixList = NumberPrefix.getPrefixListBySize(positions.size() + autoPosList.size());
for(NumberPrefix prefix : numberPrefixList) {
String numberPrefixStr = prefix.getPrefixStr();
for(int i = 0; i < coreModList.size(); i++) {
String coremodName = coreModList.get(i);
if(coremodName.length() > modStr.length()) {
if(parseStr.toLowerCase().startsWith(numberPrefixStr + coremodName)) {
modStr = coremodName;
foundCoreModification = true;
numberPrefixStrLength = numberPrefixStr.length();
}
}
}
for(int i = 0; i < substTemplateList.size(); i++) {
String substName = substTemplateList.get(i);
if(substName.length() > modStr.length()) {
if(parseStr.toLowerCase().startsWith(numberPrefixStr + substName)) {
modStr = substName;
foundSubstitution = true;
numberPrefixStrLength = numberPrefixStr.length();
}
}
}
if(modStr.length() > 0) {
break;
}
}
}
if(modStr.length() > 0) {
if(foundCoreModification) {
for(int i = 0; i < autoPosList.size(); i++) {
positions.add(new Integer(0)); //*** default positions are only valid for substituents ***
}
CoreModification mod;
CoreModificationTemplate modTemplate = CoreModificationTemplate.forCarbbankName(modStr);
if(modTemplate == null) {
throw new ResourcesDbException("Cannot get template for core modification " + modStr + " (carbbank style)");
}
if(modTemplate.equals(CoreModificationTemplate.EN)) {
modTemplate = CoreModificationTemplate.ENX;
}
if(modTemplate.equals(CoreModificationTemplate.EN) || modTemplate.equals(CoreModificationTemplate.ENX) || modTemplate.equals(CoreModificationTemplate.YN)) {
for(int i = 0; i < positions.size(); i++) {
int position = positions.get(i).intValue();
mod = new CoreModification();
mod.setDivalentModification(modTemplate, position, position + 1);
mod.setSourceName(modStr);
ms.addCoreModification(mod);
}
} else if(modTemplate.getValence() == 1) {
for(int i = 0; i < positions.size(); i++) {
int position = positions.get(i).intValue();
if(this.getNamescheme().equals(GlycanNamescheme.IUPAC) && modTemplate.equals(CoreModificationTemplate.DEOXY)) {
ArrayList<Substitution> substList = ms.getSubstitutionsByPosition(position);
for(Substitution subst: substList) {
if(subst.getLinkagetype1().equals(LinkageType.DEOXY)) {
continue; //*** deoxygenation is implied in existing substitution ***
}
}
}
if(position > 1 && modTemplate.equals(CoreModificationTemplate.KETO)) {
if(ms.getDefaultCarbonylPosition() == 1 && !ms.hasCoreModification(CoreModificationTemplate.KETO, 1)) {
ms.setDefaultCarbonylPosition(position);
}
}
mod = new CoreModification();
mod.setModification(modTemplate, position);
mod.setSourceName(modStr);
ms.addCoreModification(mod);
}
} else if(modTemplate.getValence() == 2) {
if(positions.size() == 2) {
mod = new CoreModification();
mod.setDivalentModification(modTemplate, positions.get(0).intValue(), positions.get(1).intValue());
mod.setSourceName(modStr);
ms.addCoreModification(mod);
} else {
throw new NameParsingException("Divalent core modification " + modStr + " requires two positions.");
}
}
} else if(foundSubstitution) {
positions.addAll(autoPosList);
SubstituentTemplate substTemplate = this.getTemplateContainer().getSubstituentTemplateContainer().forResidueIncludedName(this.getNamescheme(), modStr);
if(substTemplate == null) {
throw new ResourcesDbException("Cannot get substituent template for substituent name " + modStr + " and namescheme " + this.getNamescheme().getNameStr());
}
if(substTemplate.getMaxValence() == 1) {
LinkageType linktype = substTemplate.getLinkageTypeBySubstituentName(this.getNamescheme(), modStr);
for(int i = 0; i < positions.size(); i++) {
Substitution subst = new Substitution(this.getTemplateContainer());
subst.setSourceName(modStr);
if(this.getNamescheme().equals(GlycanNamescheme.IUPAC)) {
if(LinkageType.DEOXY.equals(linktype)) {
//*** substituent implies deoxygenation ***
//*** check, if explicit deoxy was given before, and if so replace that with current substituent ***
CoreModification mod = ms.getCoreModification(CoreModificationTemplate.DEOXY.getName(), positions.get(i).intValue());
if(mod != null) {
ms.deleteCoreModification(mod);
continue;
}
}
}
if(positions.size() == 1 && getDetectedBasetype().isTrivialname()) {
//*** check, if a trivial name implies an amine, which might be extended like in neu5ac: ***
TrivialnameTemplate trivTemplate = (TrivialnameTemplate) getDetectedBasetype();
for(Substitution trivSubst : trivTemplate.getSubstitutions()) {
if(trivSubst.getValence() == 1 && trivSubst.getPosition1().get(0).equals(positions.get(i))) {
if(trivSubst.getName().equals(this.getTemplateContainer().getSubstituentTemplateContainer().forResidueIncludedName(GlycanNamescheme.CARBBANK, "n").getName())) {
//*** trivial name contains amine, now if the substituent is n-linked or can be added to an n to build an n-linked one (like Ac to NAc), the amine is to be replaced by this ***
Substitution nSubst = ms.getSubstitution(trivSubst.getName(), trivSubst.getPosition1().get(0).intValue(), LinkageType.DEOXY);
if(nSubst != null) {
if(substTemplate.getDefaultLinkingAtom1().getElement().getPeriodicNumber() == 7) {
nSubst.alterSubstituentTemplate(substTemplate);
substTemplate = null;
} else {
SubstituentTemplate tmpSubstTmpl = this.getTemplateContainer().getSubstituentTemplateContainer().forName(GlycanNamescheme.CARBBANK, "n" + modStr);
if(tmpSubstTmpl != null && tmpSubstTmpl.getDefaultLinkingAtom1().getElement().getPeriodicNumber() == 7) {
nSubst.alterSubstituentTemplate(tmpSubstTmpl);
substTemplate = null;
}
}
}
}
}
}
}
if(substTemplate != null) {
subst.setSubstitution(substTemplate, positions.get(i).intValue(), linktype);
ms.addSubstitution(subst);
if(subst.getBondOrder1() == 2) {
if(subst.getPosition1().size() == 1 && subst.getPosition1().get(0).intValue() != 0) {
ms.addCoreModification(new CoreModification(CoreModificationTemplate.SP2, subst.getPosition1().get(0).intValue()));
} else {
System.out.println("Warning: substitution with sp2 hybrid at unknown position.");
}
}
}
}
} else if(substTemplate.getMaxValence() == 2) {
Substitution subst = new Substitution();
subst.setSourceName(modStr);
if(positions.size() == 2) {
subst.setDivalentSubstitution(substTemplate, positions.get(0).intValue(), positions.get(1).intValue());
ms.addSubstitution(subst);
} else {
if(substTemplate.getMinValence() == 2) {
throw new NameParsingException("Only one position given for divalent substituent " + modStr);
} else {
for(Integer pos : positions) {
subst = new Substitution();
subst.setSourceName(modStr);
subst.setSubstitution(substTemplate, pos.intValue());
ms.addSubstitution(subst);
}
}
}
}
}
parseStr = parseStr.substring(modStr.length() + numberPrefixStrLength);
this.increaseParsingPosition(modStr.length() + numberPrefixStrLength);
} else { //*** no modification found ***
if(defaultPos == 0) { //*** position was given explicitely ***
throw new NameParsingException("Cannot assign modification in " + parseStr, this.getInputName(), this.getParsingPosition());
}
}
}
return(parseStr);
}
//*****************************************************************************
//*** other methods: **********************************************************
//*****************************************************************************
public void init() {
super.init();
this.setPrePosStr("");
this.setPostPosStr("");
this.setDetectedBasetype(null);
this.setDetectedSubtype(null);
this.setFuzzy(0);
this.setUseDefaultValues(false);
this.detectedRingtype = null;
}
}