/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$
*/
package org.eurocarbdb.resourcesdb.template;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import org.eurocarbdb.resourcesdb.Config;
import org.eurocarbdb.resourcesdb.GlycanNamescheme;
import org.eurocarbdb.resourcesdb.ResourcesDbException;
import org.eurocarbdb.resourcesdb.glycoconjugate_derived.LinkageType;
import org.eurocarbdb.resourcesdb.monosaccharide.*;
import org.eurocarbdb.resourcesdb.util.NumberUtils;
import org.eurocarbdb.resourcesdb.util.Utils;
import org.jdom.Document;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
/**
* Class to store and manage Trivial Name Templates
* @author Thomas Luetteke
*
*/
public class TrivialnameTemplateContainer {
private Config config = null;
private SubstituentTemplateContainer substContainer;
//*****************************************************************************
//*** Constructors: ***********************************************************
//*****************************************************************************
public TrivialnameTemplateContainer() {
this.setConfig(new Config());
}
public TrivialnameTemplateContainer(Config conf) {
this.setConfig(conf);
}
public TrivialnameTemplateContainer(Config conf, SubstituentTemplateContainer substTmplContainer) {
this.setConfig(conf);
this.setSubstContainer(substTmplContainer);
}
//*****************************************************************************
//*** Getters/Setters: ********************************************************
//*****************************************************************************
private void setConfig(Config theConf) {
this.config = theConf;
}
public Config getConfig() {
return this.config;
}
public void setSubstContainer(SubstituentTemplateContainer container) {
this.substContainer = container;
}
public SubstituentTemplateContainer getSubstContainer() {
if(this.substContainer == null) {
this.substContainer = new SubstituentTemplateContainer(this.getConfig());
}
return this.substContainer;
}
//*****************************************************************************
//*** Template Lists / Maps: **************************************************
//*****************************************************************************
private ArrayList<TrivialnameTemplate> trivialnameTemplateList;
private HashMap<GlycanNamescheme, ArrayList<String>> trivialnameTemplateNamelistsMap;
public ArrayList<TrivialnameTemplate> getTemplateList() throws ResourcesDbException {
if(this.trivialnameTemplateList == null) {
this.trivialnameTemplateList = this.readTemplateList(this.getConfig());
}
return this.trivialnameTemplateList;
}
public TrivialnameTemplate forBasetypeName(GlycanNamescheme scheme, String basename) throws ResourcesDbException {
for(TrivialnameTemplate template : this.getTemplateList()) {
//TODO: adjust to new structure of template (done?)
if(template.isTrivialName(scheme, basename)) {
return template;
}
/*if(template.getBaseName().equalsIgnoreCase(basename)) {
if(template.getNameschemes().contains(scheme)) {
return template;
}
}*/
}
return null;
}
public ArrayList<String> getTrivialnameBasetypeList(GlycanNamescheme scheme) throws ResourcesDbException {
if(this.trivialnameTemplateNamelistsMap == null) {
this.trivialnameTemplateNamelistsMap = new HashMap<GlycanNamescheme, ArrayList<String>>();
}
ArrayList<String> basetypeList = this.trivialnameTemplateNamelistsMap.get(scheme);
if(basetypeList == null) {
ArrayList<TrivialnameTemplate> templates = this.getTemplateList();
basetypeList = new ArrayList<String>();
for(TrivialnameTemplate template : templates) {
if(template.getNameschemes().contains(scheme)) {
if(template.getSchemesMap().get(scheme) != null) {
basetypeList.addAll(template.getSchemesMap().get(scheme));
}
}
}
this.trivialnameTemplateNamelistsMap.put(scheme, basetypeList);
}
return basetypeList;
}
public boolean isTrivialname(GlycanNamescheme scheme, String basename) throws ResourcesDbException {
return (this.forBasetypeName(scheme, basename)!= null);
}
//*****************************************************************************
//*** Methods for filling Template Lists / Maps: ******************************
//*****************************************************************************
private ArrayList<TrivialnameTemplate> readTemplateList(Config conf) throws ResourcesDbException {
return this.getTemplateListFromXml(conf.getTrivialnameTemplatesXmlUrl());
}
private ArrayList<TrivialnameTemplate> getTemplateListFromXml(URL xmlUrl) throws ResourcesDbException {
ArrayList<TrivialnameTemplate> tmplList = new ArrayList<TrivialnameTemplate>();
SAXBuilder parser = new SAXBuilder();
try {
Document doc = parser.build(xmlUrl);
org.jdom.Element root = doc.getRootElement();
List<?> templateTagsList = root.getChildren();
Iterator<?> templatesIter = templateTagsList.iterator();
while(templatesIter.hasNext()) {
org.jdom.Element xmlTemplate = (org.jdom.Element) templatesIter.next();
TrivialnameTemplate template = getTemplateFromXmlTree(xmlTemplate);
if(template != null) {
tmplList.add(template);
}
}
} catch (JDOMException je) {
throw new ResourcesDbException("Exception in reading TrivialnameTemplate XML file.", je);
} catch (IOException ie) {
throw new ResourcesDbException("Exception in reading TrivialnameTemplate XML file.", ie);
}
return tmplList;
}
private TrivialnameTemplate getTemplateFromXmlTree(org.jdom.Element xmlTemplate) {
TrivialnameTemplate template = null;
if(xmlTemplate.getName().equalsIgnoreCase("template")) {
template = new TrivialnameTemplate();
List<?> propList = xmlTemplate.getChildren();
Iterator<?> propIter = propList.iterator();
GlycanNamescheme scheme = null;
try {
while(propIter.hasNext()) {
org.jdom.Element property = (org.jdom.Element) propIter.next();
String propName = property.getName().toLowerCase();
if(propName.equals("notation_set")) {
String name = property.getAttributeValue("name");
List<?> schemeList = property.getChildren();
Iterator<?> schemeIter = schemeList.iterator();
while(schemeIter.hasNext()) {
org.jdom.Element schemeTag = (org.jdom.Element) schemeIter.next();
if(schemeTag.getName().toLowerCase().equals("namescheme")) {
scheme = GlycanNamescheme.forName(schemeTag.getValue());
String primaryStr = schemeTag.getAttributeValue("primary");
boolean isPrimary = Utils.parseTrueFalseString(primaryStr, false);
template.addName(name, scheme, isPrimary);
}
}
} else if(propName.equals("longname")) {
template.setLongName(property.getValue());
} else if(propName.equals("stereocode")) {
template.setStereocode(property.getValue());
} else if(propName.equals("size")) {
template.setSize(Integer.parseInt(property.getValue()));
} else if(propName.equals("carbonyl_position")) {
template.setCarbonylPosition(NumberUtils.parseIntStr(property.getValue(), 0));
} else if(propName.equals("default_configuration")) {
try {
template.setDefaultConfiguration(StereoConfiguration.forNameOrSymbol(property.getValue()));
String compulsory = property.getAttributeValue("compulsory");
boolean isCompulsory = false;
if(compulsory != null && compulsory.length() > 0) {
isCompulsory = Utils.parseTrueFalseString(compulsory, isCompulsory);
}
template.setDefaultConfigIsCompulsory(isCompulsory);
} catch(MonosaccharideException me) {}
} else if(propName.equals("default_ringtype")) {
if(property.getValue().equals("p")) {
template.setDefaultRingend(template.getCarbonylPosition() + 4);
} else if(property.getValue().equals("f")) {
template.setDefaultRingend(template.getCarbonylPosition() + 3);
}
} else if(propName.equals("coremodification_list")) {
template.setCoreModifications(getCoreModificationListFromXmlTree(property));
} else if(propName.equals("substitution_list")) {
template.setSubstitutions(getSubstitutionListFromXmlTree(GlycanNamescheme.GLYCOCT, property));
} else {
System.err.println("Warning: unknown tag in trivialname templates xml file: " + property.getName());
}
}
} catch(ResourcesDbException me) {
System.err.println("Warning: error in parsing trivial name templates xml file " + me);
me.printStackTrace();
return(null);
}
}
return(template);
}
private ArrayList<CoreModification> getCoreModificationListFromXmlTree(org.jdom.Element xmlElement) throws MonosaccharideException {
ArrayList<CoreModification> modList = new ArrayList<CoreModification>();
List<?> xmlModList = xmlElement.getChildren();
Iterator<?> modIter = xmlModList.iterator();
while(modIter.hasNext()) {
CoreModification mod = getCoreModificationFromXmlTree((org.jdom.Element)modIter.next());
if(mod == null) {
throw new MonosaccharideException("Error in parsing core modifications of trivialname templates xml file.");
}
modList.add(mod);
}
return(modList);
}
private CoreModification getCoreModificationFromXmlTree(org.jdom.Element xmlElement) throws MonosaccharideException {
CoreModification mod = null;
if(xmlElement.getName().equalsIgnoreCase("modification")) {
CoreModificationTemplate modTemplate = null;
int modPosition1 = 0;
int modPosition2 = 0;
Iterator<?> modpropIter = xmlElement.getChildren().iterator();
while(modpropIter.hasNext()) {
org.jdom.Element property = (org.jdom.Element) modpropIter.next();
if(property.getName().equalsIgnoreCase("type")) {
modTemplate = CoreModificationTemplate.forName(property.getValue());
} else if(property.getName().equalsIgnoreCase("position1")) {
modPosition1 = Integer.parseInt(property.getValue());
} else if(property.getName().equalsIgnoreCase("position2")) {
if(property.getValue().length() > 0) {
modPosition2 = Integer.parseInt(property.getValue());
}
}
}
if(modTemplate != null && modPosition1 != 0) {
mod = new CoreModification();
if(modPosition2 == 0) {
mod.setModification(modTemplate, modPosition1);
} else {
mod.setDivalentModification(modTemplate, modPosition1, modPosition2);
}
}
}
return(mod);
}
private ArrayList<Substitution> getSubstitutionListFromXmlTree(GlycanNamescheme scheme, org.jdom.Element xmlElement) throws ResourcesDbException {
ArrayList<Substitution> substList = new ArrayList<Substitution>();
List<?> xmlSubstList = xmlElement.getChildren();
Iterator<?> substIter = xmlSubstList.iterator();
while(substIter.hasNext()) {
org.jdom.Element xmlElem = (org.jdom.Element) substIter.next();
Substitution subst = getSubstitutionFromXmlTree(scheme, xmlElem);
if(subst == null) {
throw new MonosaccharideException("Error in parsing substitutions of trivialname templates xml file.");
}
substList.add(subst);
}
return(substList);
}
private Substitution getSubstitutionFromXmlTree(GlycanNamescheme scheme, org.jdom.Element xmlElement) throws ResourcesDbException {
Substitution subst = null;
if(xmlElement.getName().equalsIgnoreCase("substitution")) {
SubstituentTemplate substTemplate = null;
int position1 = 0;
int position2 = 0;
int substPosition1 = 0;
int substPosition2 = 0;
LinkageType linktype1 = null;
LinkageType linktype2 = null;
Iterator<?> substIter = xmlElement.getChildren().iterator();
while(substIter.hasNext()) {
org.jdom.Element property = (org.jdom.Element) substIter.next();
if(property.getName().equalsIgnoreCase("type")) {
substTemplate = this.getSubstContainer().forName(scheme, property.getValue());
if(substTemplate == null) {
substTemplate = this.substContainer.forName(GlycanNamescheme.GLYCOCT, property.getValue());
}
if(substTemplate == null) {
System.err.println("cannot get substitution template for " + scheme.getNameStr() + "::" + property.getValue());
}
} else if(property.getName().equalsIgnoreCase("position1")) {
position1 = Integer.parseInt(property.getValue());
} else if(property.getName().equalsIgnoreCase("position2")) {
if(property.getValue().length() > 0) {
position2 = Integer.parseInt(property.getValue());
}
} else if(property.getName().equalsIgnoreCase("substituent_position1")) {
if(property.getValue().length() > 0) {
substPosition1 = Integer.parseInt(property.getValue());
}
} else if(property.getName().equalsIgnoreCase("substituent_position2")) {
if(property.getValue().length() > 0) {
substPosition2 = Integer.parseInt(property.getValue());
}
} else if(property.getName().equalsIgnoreCase("linkagetype1")) {
if(property.getValue().length() > 0) {
linktype1 = SubstituentTemplate.getLinkageTypeByLinkageName(property.getValue());
}
} else if(property.getName().equalsIgnoreCase("linkagetype2")) {
if(property.getValue().length() > 0) {
linktype2 = SubstituentTemplate.getLinkageTypeByLinkageName(property.getValue());
}
}
}
if(substTemplate != null && position1 != 0) {
subst = new Substitution();
if(linktype1 == null) {
linktype1 = substTemplate.getDefaultLinkagetype1();
}
if(substPosition1 == 0) {
substPosition1 = substTemplate.getDefaultLinkingPosition1();
}
if(position2 == 0) {
subst.setSubstitution(substTemplate, position1, linktype1, substPosition1);
} else {
if(linktype2 == null) {
linktype2 = substTemplate.getDefaultLinkagetype2();
}
if(substPosition2 == 0) {
substPosition2 = substTemplate.getDefaultLinkingPosition2();
}
subst.setDivalentSubstitution(substTemplate, position1, linktype1, substPosition1, position2, linktype2, substPosition2);
}
}
}
return(subst);
}
//*****************************************************************************
//*** Other Methods: **********************************************************
//*****************************************************************************
/**
* Check, whether there is a trivial name available for a monosaccharide in a given namescheme
* @param scheme: the namescheme in which the monosaccharide shall be encoded
* @param ms: the monosaccharide to be checked
* @return: the appropriate TrivialnameTemplate or null if no trivial name is existing for the given monosaccharide
* @throws ResourcesDbException
*/
public TrivialnameTemplate checkMsForTrivialname(GlycanNamescheme scheme, Monosaccharide ms) throws ResourcesDbException {
//*** (1) prepare ms stereostring for comparison: ***
String msStereo = ms.getStereoStr();
//*** (1a) mask anomeric position in ms stereostring: ***
if(ms.getRingStart() > 0) {
msStereo = Stereocode.setPositionInStereoString(msStereo, StereoConfiguration.Nonchiral, ms.getRingStart());
//msStereo = msStereo.substring(0, ms.getCarbonylPosition() - 1) + StereoConfiguration.Nonchiral.getStereosymbol() + msStereo.substring(ms.getCarbonylPosition());
}
//*** (1b) chop of first and last position of stereocode: ***
msStereo = msStereo.substring(1, msStereo.length() - 1);
//*** (1c) if stereostring is composed of relative configurations, get absolute configurations for comparison to templates: ***
try {
if(Stereocode.stereoStringHasRelativePosition(msStereo)) {
if(scheme.equals(GlycanNamescheme.CFG)) {
return null; //*** cfg notation doesn't support unknown configuration ***
}
msStereo = Stereocode.relativeToAbsolute(msStereo);
}
} catch(MonosaccharideException me) {
return null;
}
//*** (1d) generate D/L mirrored ms stereostring: ***
String msStereoDL = Stereocode.changeDLinStereoString(msStereo);
//*** (2) search for trivial name template matching ms properties: ***
ArrayList<String> trivialnameList = this.getTrivialnameBasetypeList(scheme);
TrivialnameTemplate returnTemplate = null;
if(trivialnameList == null) {
return null; //*** no trivial names defined for given name scheme ***
}
int returnTemplateSplitCount = 0;
for(String trivTmplName : trivialnameList) {
int currentTemplateSplitCount = 0;
TrivialnameTemplate template = this.forBasetypeName(scheme, trivTmplName);
if(template.getSize() == ms.getSize()) {
if(template.getCarbonylPosition() == ms.getRingStart() || ms.getRingStart() == Basetype.OPEN_CHAIN || ms.getRingStart() == Basetype.UNKNOWN_RING) {
//*** compare stereocodes (ignoring anomeric (masked above)): ***
String tmplStereo = template.getStereocode();
if(tmplStereo.equals(msStereo) || tmplStereo.equals(msStereoDL)) {
if(template.isDefaultConfigIsCompulsory()) {
if(!Stereocode.getConfigurationFromStereoString(msStereo).equals(template.getDefaultConfiguration())) {
continue;
}
}
//*** check core modifications: ***
if(scheme.equals(GlycanNamescheme.BCSDB)) {
//*** ms must contain exactly the same deoxy / keto / en(x) / yn modifications as the trivialname template ***
//*** and all other core mods. / substs. of the template, but may have further further core mods. (e.g. anhydro or acid) or substs. ***
if(ms.countCoreModifications() < template.getCoreModificationCount()) {
continue;
}
if(ms.countCoreModifications(CoreModificationTemplate.DEOXY) != template.countCoreModifications(CoreModificationTemplate.DEOXY)) {
continue;
}
if(ms.countCoreModifications(CoreModificationTemplate.KETO) != template.countCoreModifications(CoreModificationTemplate.KETO)) {
continue;
}
if(ms.countCoreModifications(CoreModificationTemplate.EN) != template.countCoreModifications(CoreModificationTemplate.EN)) {
continue;
}
if(ms.countCoreModifications(CoreModificationTemplate.ENX) != template.countCoreModifications(CoreModificationTemplate.ENX)) {
continue;
}
if(ms.countCoreModifications(CoreModificationTemplate.YN) != template.countCoreModifications(CoreModificationTemplate.YN)) {
continue;
}
} else {
//*** ms must contain exactly the same core modifications as the trivialname template, ***
//*** and all substitutions of the trivialname template (but may have further substitutions) ***
if(ms.countCoreModifications() != template.getCoreModificationCount()) {
continue;
}
}
//*** numbers of core modifications match, so check types / positions now: ***
ArrayList<CoreModification> trivialMods = template.getCoreModifications();
boolean modificationMissing = false;
for(int m = 0; m < trivialMods.size(); m++) {
CoreModification mod = trivialMods.get(m);
if(!ms.hasCoreModification(mod)) {
modificationMissing = true;
break;
}
}
if(modificationMissing) {
continue;
}
//*** check substitutions: ***
ArrayList<Substitution> trivialSubstList = template.getSubstitutions();
for(Substitution trivialSubst: trivialSubstList) {
if(!ms.hasSubstitution(trivialSubst)) {
modificationMissing = true;
if(scheme.equals(GlycanNamescheme.CFG)) {
//*** check, if subst in trivial name is subpart of subst in ms, and remaining subpart of ms subst is defined in CFG namescheme: ***
List<Substitution> msSubstList = ms.getSubstitutions();
for(Substitution msSubst : msSubstList) {
///*** check, if positions match: ***
if(!Utils.formatPositionsString(trivialSubst.getPosition1(), "/", "?").equals(Utils.formatPositionsString(msSubst.getPosition1(), "/", "?"))) {
continue;
}
if(msSubst.hasPosition2()) {
if(!trivialSubst.hasPosition2()) {
continue;
}
if(!Utils.formatPositionsString(trivialSubst.getPosition2(), "/", "?").equals(Utils.formatPositionsString(msSubst.getPosition2(), "/", "?"))) {
continue;
}
}
//*** check for subparts: ***
SubstituentSubpartTreeNode msSubstNode = msSubst.getTemplate().getSubparts();
if(msSubstNode != null) {
SubstituentTemplate msSubstNodeTmpl = msSubstNode.getSubstTmpl(this.getSubstContainer());
if(trivialSubst.getTemplate().getName().equals(msSubstNodeTmpl.getName())) {
if(msSubstNode.getChildCount() == 1) {
SubstituentSubpartTreeNode msChildNode = (SubstituentSubpartTreeNode)msSubstNode.getFirstChild();
if(!msChildNode.isLeaf()) {
continue;
}
SubstituentTemplate msChildTmpl = msChildNode.getSubstTmpl(this.getSubstContainer());
if(msChildTmpl == null) {
throw new ResourcesDbException("Cannot get SubstituentTemplate for subst. subpart " + msChildNode.getName());
}
try {
msChildTmpl.getPrimaryAlias(GlycanNamescheme.CFG, LinkageType.H_AT_OH);
} catch(ResourcesDbException rEx) {
continue; //*** no primary alias defined for subst. subpart in CFG notation ***
}
modificationMissing = false;
currentTemplateSplitCount ++;
break;
}
}
}
}
} else {
//*** check, if trivial name includes amino subst. and ms contains extended amino subst.: ***
if(trivialSubst.getName().equalsIgnoreCase(SubstituentTemplate.AMINOTEMPLATENAME)) {
List<Substitution> msSubstList = ms.getSubstitutions();
for(Substitution msSubst : msSubstList) {
if(msSubst.getPosition1().size() == 1 && msSubst.getIntValuePosition1() == trivialSubst.getIntValuePosition1()) {
if(msSubst.getTemplate().isExtendedAmine(this.getSubstContainer())) {
modificationMissing = false;
break;
}
}
}
}
}
if(modificationMissing) {
break;
}
}
}
if(modificationMissing) {
continue;
}
if(returnTemplate == null || template.getSubstitutionCount() > returnTemplate.getSubstitutionCount()) {
returnTemplate = template;
returnTemplateSplitCount = currentTemplateSplitCount;
} else {
if(currentTemplateSplitCount < returnTemplateSplitCount && template.getSubstitutionCount() == returnTemplate.getSubstitutionCount()) {
returnTemplate = template;
returnTemplateSplitCount = currentTemplateSplitCount;
}
}
}
}
}
}
return returnTemplate;
}
}