/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$
*/
package org.eurocarbdb.resourcesdb.io;
import java.util.ArrayList;
import org.eurocarbdb.resourcesdb.*;
import org.eurocarbdb.resourcesdb.glycoconjugate_derived.LinkageType;
import org.eurocarbdb.resourcesdb.monosaccharide.CoreModification;
import org.eurocarbdb.resourcesdb.monosaccharide.CoreModificationTemplate;
import org.eurocarbdb.resourcesdb.monosaccharide.Monosaccharide;
import org.eurocarbdb.resourcesdb.monosaccharide.MonosaccharideException;
import org.eurocarbdb.resourcesdb.monosaccharide.MonosaccharideValidation;
import org.eurocarbdb.resourcesdb.monosaccharide.Ringtype;
import org.eurocarbdb.resourcesdb.monosaccharide.Stereocode;
import org.eurocarbdb.resourcesdb.monosaccharide.Substitution;
import org.eurocarbdb.resourcesdb.template.BasetypeTemplate;
import org.eurocarbdb.resourcesdb.template.SubstituentTemplate;
import org.eurocarbdb.resourcesdb.template.TemplateContainer;
import org.eurocarbdb.resourcesdb.template.TrivialnameTemplate;
import org.eurocarbdb.resourcesdb.util.StringUtils;
import org.eurocarbdb.resourcesdb.util.Utils;
/**
* This class stores the exporter, i.e. the name builder, for carbohydrate notations that are based on the CarbBank nomenclature, like CarbBank itself, Linucs/GlycoSciences, Sweet, etc.
* @author Thomas Lütteke
*
*/
public class CarbbankExporter extends StandardExporter implements MonosaccharideExporter {
private TrivialnameTemplate trivialTmpl = null;
//*****************************************************************************
//*** constructors: ***********************************************************
//*****************************************************************************
public CarbbankExporter(GlycanNamescheme namescheme) throws ResourcesDbException {
super(namescheme);
}
public CarbbankExporter(GlycanNamescheme namescheme, Config conf) throws ResourcesDbException {
super(namescheme, conf);
}
public CarbbankExporter(GlycanNamescheme namescheme, Config conf, TemplateContainer container) throws ResourcesDbException {
super(namescheme, conf, container);
}
public CarbbankExporter(Config conf, TemplateContainer container) throws ResourcesDbException {
super(GlycanNamescheme.CARBBANK, conf, container);
}
//*****************************************************************************
//*** getters/setters: ********************************************************
//*****************************************************************************
public TrivialnameTemplate getTrivialTmpl() {
return this.trivialTmpl;
}
public void setTrivialTmpl(TrivialnameTemplate trivialTmpl) {
this.trivialTmpl = trivialTmpl;
}
//*****************************************************************************
//*** export methods: *********************************************************
//*****************************************************************************
/**
* Generate the name string of a monosaccharide.
* If a trivial name exists that will be used only if it is the primary alias for the monosaccharide.
* @param ms the monosaccharide the name of which is generated
* @return the name string
* @throws ResourcesDbException
*/
public String export(Monosaccharide ms) throws ResourcesDbException {
return export(ms, this.getConfig().isForceTrivialNames());
}
/**
* Generate the name string of a monosaccharide.
* If forceTrivial is set to true and a trivial name exists that will be used regardless if the trivial name is the primary alias or not,
* while forceTrivial set to false will return a trivial name only if it is the primary alias.
* @param ms the monosaccharide the name of which is generated
* @param forceTrivial a flag to indicate if a potential trivial name shall be used even if that is a secondary alias
* @return the name string
* @throws ResourcesDbException
*/
public String export(Monosaccharide ms, boolean forceTrivial) throws ResourcesDbException {
if(getNamescheme() == null) {
throw new MonosaccharideException("No namescheme set in CarbbankExporter.java");
}
String msNameStr = "";
if(ms.getRingtype().equals(Ringtype.OPEN) && !MonosaccharideValidation.impliesOpenChain(ms)) {
if(ms.hasCoreModification(CoreModificationTemplate.KETO) && !ms.hasCoreModification(CoreModificationTemplate.KETO, 1)) {
msNameStr += "keto-";
} else {
msNameStr += "aldehydo-";
}
}
//*** get basetypes and - if applicable - trivial name: ***
ArrayList<String> basetypeList;
TrivialnameTemplate trivTmpl = null;
if(this.getConfig().isAllowTrivialNames()) {
//trivTmpl = TrivialnameTemplateContainer.checkMsForTrivialname(this.namescheme, ms, this.getTemplateContainer());
trivTmpl = this.getTemplateContainer().getTrivialnameTemplateContainer().checkMsForTrivialname(this.getNamescheme(), ms);
if(trivTmpl != null && trivTmpl.getPrimaryName(this.getNamescheme()) == null && !forceTrivial) {
trivTmpl = null;
}
}
this.setTrivialTmpl(trivTmpl);
if(trivTmpl != null) {
basetypeList = new ArrayList<String>();
basetypeList.add(Stereocode.getTrivialnameConfigurationFromStereoString(ms.getStereoStr()).getSymbol() + "-" + trivTmpl.getPrimaryName(this.getNamescheme()));
} else {
basetypeList = Stereocode.getBasetypelistFromStereocode(ms, this.getTemplateContainer().getBasetypeTemplateContainer());
}
int basetypeIndex = 0;
//*** write additional basetypes for residues consisting of more than 6 backbone carbon atoms: ***
while(basetypeIndex < basetypeList.size() - 1) {
msNameStr += unknownConfigBasetypeXtoQMark(basetypeList.get(basetypeIndex)) + "-" + msNameStr;
basetypeIndex++;
}
//*** write anomer: ***
String anomerStr = ms.getAnomer().formatCarbbankSymbol();
if(this.getNamescheme().equals(GlycanNamescheme.CARBBANK) && anomerStr.equals("?-")) {
anomerStr = "";
}
msNameStr += anomerStr;
//*** write configuration, basetype and core modifications: ***
if(trivTmpl != null) { //*** trivial name found, so core modifications do not have to be considered here (are implied in trivial name) ***
if(this.getNamescheme().equals(GlycanNamescheme.CARBBANK) && basetypeList.get(basetypeIndex).substring(0, 2).equalsIgnoreCase("X-")) {
msNameStr += StringUtils.camelCase(deleteUnknownConfigBasetypeX(basetypeList.get(basetypeIndex)), 0, 0);
} else {
msNameStr += StringUtils.camelCase(unknownConfigBasetypeXtoQMark(basetypeList.get(basetypeIndex)), 0, 2);
}
} else {
String tmpBasetypeStr;
if(basetypeList.size() > 0) { //*** residue is not just superclass ***
if(this.getNamescheme().equals(GlycanNamescheme.CARBBANK)) {
tmpBasetypeStr = deleteUnknownConfigBasetypeX(basetypeList.get(basetypeIndex));
} else {
tmpBasetypeStr = unknownConfigBasetypeXtoQMark(basetypeList.get(basetypeIndex));
}
} else {
tmpBasetypeStr = this.getTemplateContainer().getBasetypeTemplateContainer().getSuperclassTemplateBySize(ms.getSize()).getBaseName();
}
String configStr = "";
String basetypeStr = tmpBasetypeStr;
if(tmpBasetypeStr.length() == 5) {
configStr = tmpBasetypeStr.substring(0, 2);
basetypeStr = tmpBasetypeStr.substring(2);
}
msNameStr += configStr;
boolean wroteDeoxy = false;
for(CoreModification coreMod : ms.getCoreModifications()) {
if(coreMod.getTemplate().equals(CoreModificationTemplate.ACID)) {
continue; //*** acids are handled either behind the ring size (uronic) or at the end of the namestring (-aric or -onic) ***
}
if(coreMod.getTemplate().equals(CoreModificationTemplate.KETO)) {
continue; //*** keto groups are handled in front of the ring size ***
}
if(coreMod.getTemplate().equals(CoreModificationTemplate.SP2)) {
continue; //*** sp2 hybrids are not explicitly mentioned (implied in substitution) ***
}
if(coreMod.getTemplate().equals(CoreModificationTemplate.ALDITOL)) {
continue; //*** alditols are marked by adding "-ol" at the end of the residue ***
}
//*** format position(s) string: ***
String posStr = Utils.formatPositionsString(coreMod.getPosition1(), "/", "?");
if(coreMod.getTemplate().equals(CoreModificationTemplate.DEOXY)) {
//*** join deoxy modifications: ***
if(wroteDeoxy) {
continue;
}
for(CoreModification deoxyMod : ms.getCoreModifications(CoreModificationTemplate.DEOXY.getName())) {
if(deoxyMod.equals(coreMod)) {
continue;
}
posStr += "," + Utils.formatPositionsString(deoxyMod.getPosition1(), "/", "?");
}
wroteDeoxy = true;
}
if(coreMod.getValence() == 2 && !(coreMod.getTemplate().equals(CoreModificationTemplate.EN) || coreMod.getTemplate().equals(CoreModificationTemplate.ENX) || coreMod.getTemplate().equals(CoreModificationTemplate.YN))) {
posStr += "," + Utils.formatPositionsString(coreMod.getPosition2(), "/", "?");
}
//*** add core modification to ms name string: ***
msNameStr += posStr + "-" + coreMod.getName() + "-";
}
if(this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(basetypeStr).getSize() == ms.getSize()) {
//*** ms is classified by its basetype, such as in b-d-glcp ***
basetypeStr = StringUtils.camelCase(basetypeStr);
} else {
//*** ms is classified by a combination of basetype and superclass, such as in b-d-4-deoxy-xylhexp ***
//*** in this case, the basetype "tro" has to be replaced by "thr": ***
if(basetypeStr.equals("tro")) {
basetypeStr = "thr";
}
}
msNameStr += basetypeStr;
if(basetypeList.size() > 0) { //*** residue not is only given by superclass ***
BasetypeTemplate msTmpl = this.getTemplateContainer().getBasetypeTemplateContainer().getBasetypeTemplateByName(basetypeList.get(basetypeIndex).substring(2));
if(msTmpl.getSize() < ms.getSize()) {
msNameStr += StringUtils.camelCase(this.getTemplateContainer().getBasetypeTemplateContainer().getSuperclassTemplateBySize(ms.getSize()).getBaseName());
}
}
//*** write information on carbonyl groups at positions other than 1: ***
/*if(ms.getCarbonylPosition() > 1) {
msNameStr += ms.getCarbonylPosition() + "ulo";
}*/
for(CoreModification coreMod : ms.getCoreModifications(CoreModificationTemplate.KETO.getName())) {
msNameStr += coreMod.getPosition1().get(0) + "ulo";
}
}
//*** write ring type: ***
msNameStr += ms.getRingtypeSymbol();
//*** mark uronic acids: ***
if(ms.isUronic() && trivTmpl == null) {
msNameStr += "A";
}
//*** write substitutions: ***
int substCount = 0; //*** substCount counts the substitutions that are written to the residue name ***
for(Substitution subst : ms.getSubstitutions()) {
//*** format position(s): ***
String posStr;
posStr = Utils.formatPositionsString(subst.getPosition1(), "/", "?");
if(!GlycanNamescheme.GWB.equals(this.getNamescheme())) {
if(posStr.equals("2") && (subst.getTemplate().getName().equalsIgnoreCase(SubstituentTemplate.AMINOTEMPLATENAME) || subst.getTemplate().isExtendedAmine(this.getTemplateContainer().getSubstituentTemplateContainer())) && substCount == 0 && !subst.hasPosition2()) {
posStr = "";
}
}
if(subst.hasPosition2()) {
posStr += "," + Utils.formatPositionsString(subst.getPosition2(), "/", "?");
}
//*** format substituent name: ***
String substName = subst.getResidueIncludedName(this.getNamescheme());
//*** consider special cases that occur with trivial names like Neu5Ac: ***
if(trivTmpl != null) {
//*** check, if substitution is (fully or partly) included in trivial name: ***
int position1 = subst.getIntValuePosition1();
if(position1 > 0) {
Substitution trivSubst = trivTmpl.getSubstitutionByPosition(position1);
if(trivSubst != null) { //*** trivial name includes substitution at current position ***
if(trivSubst.equals(subst)) {
continue; //*** substitution is fully included in trivial name => must not repeated in substituents list ***
}
if(trivSubst.getName().equals(SubstituentTemplate.AMINOTEMPLATENAME)) { //*** substitution implied in trivial name is an amino group, check, what part of the substituent is to be added to the residue name: ***
if(subst.getResidueIncludedName(this.getNamescheme()) != null && this.getTemplateContainer().getSubstituentTemplateContainer().forName(this.getNamescheme(), subst.getResidueIncludedName(this.getNamescheme())).getName().equals(SubstituentTemplate.AMINOTEMPLATENAME)) {
continue; //*** most simple case: amine, which is already implied in trivial name, is residueIncludedName, and the rest of the substituent will be given as a separate residue ***
}
if(subst.getTemplate().isExtendedAmine(this.getTemplateContainer().getSubstituentTemplateContainer())) {
//*** trivial name includes amino group, substituent is N-linked => add name of corresponding o-linked substituent (without leading "O"), if possible: ***
String oLinkedEquivName = subst.getTemplate().getOLinkedEquivalent(this.getTemplateContainer().getSubstituentTemplateContainer());
if(oLinkedEquivName != null && oLinkedEquivName.length() > 0) {
//*** o-linked equivalent is defined, get template: ***
SubstituentTemplate oLinkedEquivTmpl = this.getTemplateContainer().getSubstituentTemplateContainer().forName(GlycanNamescheme.GLYCOCT, oLinkedEquivName);
String oLinkTmplSepDisp = oLinkedEquivTmpl.getSeparateDisplay(this.getNamescheme(), LinkageType.H_AT_OH);
String substTmplSepDisp = null; //subst.getTemplate().getSeparateDisplay(this.namescheme, LinkageType.H_AT_OH);
if(StringUtils.strCmpNullEqualsEmpty(oLinkTmplSepDisp, substTmplSepDisp)) {
oLinkedEquivName = oLinkedEquivTmpl.getPrimaryAlias(this.getNamescheme(), LinkageType.H_AT_OH).getResidueIncludedName();
substName = oLinkedEquivTmpl.getResidueIncludedName(this.getNamescheme(), LinkageType.H_AT_OH);
if(substName == null || substName.length() == 0) {
substName = oLinkedEquivTmpl.getSeparateDisplay(this.getNamescheme(), LinkageType.H_AT_OH);
}
//*** check, if leading "O" or "O-" is present in primary alias for the substitution,
//*** and if name without these letters is a secondary alias ***
//*** in that case, use the secondary alias: ***
if(oLinkedEquivName.startsWith("O")) {
oLinkedEquivName = oLinkedEquivName.substring(1);
}
if(oLinkedEquivName.startsWith("-")) {
oLinkedEquivName = oLinkedEquivName.substring(1);
}
if(oLinkedEquivTmpl.equals(this.getTemplateContainer().getSubstituentTemplateContainer().forName(this.getNamescheme(), oLinkedEquivName))) {
substName = oLinkedEquivName;
} else {
substName = subst.getResidueIncludedName(this.getNamescheme());
}
}
}
}
}
}
}
}
//*** add position(s) + substituent name to ms name string: ***
if(substName != null && substName.length() > 0) {
msNameStr += posStr + substName;
substCount++;
}
}
//*** mark alditols / aldaric acids / aldonic acids: ***
if(ms.isAlditol()) {
msNameStr += "-ol";
}
if(trivTmpl == null) {
if(ms.isAldaric()) {
msNameStr += "-aric";
}
if(ms.isAldonic()) {
msNameStr += "-onic";
}
}
return msNameStr;
}
//*****************************************************************************
//*** other methods: **********************************************************
//*****************************************************************************
/**
* replace the configuration symbol "X-" with "?-" in a basetype with unknown configuration
* @param basetypeStr the basetype string, starting with the configuration symbol, e.g. "X-gal" or "D-glc"
* @return
*/
private static String unknownConfigBasetypeXtoQMark(String basetypeStr) {
if(basetypeStr.substring(0, 2).equalsIgnoreCase("X-")) {
return("?-" + basetypeStr.substring(2));
}
return basetypeStr;
}
private static String deleteUnknownConfigBasetypeX(String basetypeStr) {
if(basetypeStr.substring(0, 2).equalsIgnoreCase("X-")) {
return basetypeStr.substring(2);
}
return basetypeStr;
}
}