/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ package org.eurocarbdb.MolecularFramework.io.GlycoCT; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import java.util.List; import org.eurocarbdb.MolecularFramework.io.SugarImporter; import org.eurocarbdb.MolecularFramework.io.SugarImporterException; import org.eurocarbdb.MolecularFramework.sugar.Anomer; import org.eurocarbdb.MolecularFramework.sugar.BaseType; import org.eurocarbdb.MolecularFramework.sugar.GlycoEdge; import org.eurocarbdb.MolecularFramework.sugar.GlycoGraph; import org.eurocarbdb.MolecularFramework.sugar.GlycoGraphAlternative; import org.eurocarbdb.MolecularFramework.sugar.GlycoNode; import org.eurocarbdb.MolecularFramework.sugar.GlycoconjugateException; import org.eurocarbdb.MolecularFramework.sugar.Linkage; import org.eurocarbdb.MolecularFramework.sugar.LinkageType; import org.eurocarbdb.MolecularFramework.sugar.Modification; import org.eurocarbdb.MolecularFramework.sugar.Monosaccharide; import org.eurocarbdb.MolecularFramework.sugar.NonMonosaccharide; import org.eurocarbdb.MolecularFramework.sugar.Substituent; import org.eurocarbdb.MolecularFramework.sugar.SubstituentType; import org.eurocarbdb.MolecularFramework.sugar.Sugar; import org.eurocarbdb.MolecularFramework.sugar.SugarUnitAlternative; import org.eurocarbdb.MolecularFramework.sugar.SugarUnitRepeat; import org.eurocarbdb.MolecularFramework.sugar.Superclass; import org.eurocarbdb.MolecularFramework.sugar.UnderdeterminedSubTree; import org.jdom.Document; import org.jdom.Element; import org.jdom.JDOMException; import org.jdom.input.SAXBuilder; /** * @author Logan * */ public class SugarImporterGlycoCT extends SugarImporter { public static final int MAX_BASE_TYPE = 5; private Element m_objRoot = null; private Document m_objDocument = null; private HashMap<Integer,GlycoNode> m_hashResidues = new HashMap<Integer,GlycoNode>(); private HashMap<Integer,GlycoEdge> m_hashLinkages = new HashMap<Integer,GlycoEdge>(); private HashMap<Integer,SugarUnitRepeat> m_hashRepeats = new HashMap<Integer,SugarUnitRepeat>(); private HashMap<Integer,SugarUnitAlternative> m_hashAlternatives = new HashMap<Integer,SugarUnitAlternative>(); private GlycoGraph m_objSugarUnit = null; private HashMap<GlycoNode,GlycoGraph> m_hashGraphs = new HashMap<GlycoNode,GlycoGraph>(); public Sugar parse(String a_strXML) throws SugarImporterException { SAXBuilder builder = new SAXBuilder(); try { this.m_objDocument = builder.build(new StringReader(a_strXML)); if (builder.getValidation()) { throw new SugarImporterException("XML Validation error"); } return this.parse(this.m_objDocument.getRootElement()); } catch (JDOMException e) { throw new SugarImporterException(e.getMessage(),e); } catch (IOException e) { throw new SugarImporterException(e.getMessage(),e); } catch (NumberFormatException e) { throw new SugarImporterException(e.getMessage(),e); } } public Sugar parse(Element a_objRoot) throws SugarImporterException { try { this.clear(); this.m_objSugar = new Sugar(); this.m_objSugarUnit = this.m_objSugar; this.m_objRoot = a_objRoot; List t_lMainElements = this.m_objRoot.getChildren(); for (Iterator t_iterElements = t_lMainElements.iterator(); t_iterElements.hasNext();) { Element t_objMain = (Element) t_iterElements.next(); if (t_objMain.getName().equals("residues")) { this.parseResidueSection(t_objMain); } else if (t_objMain.getName().equals("linkages")) { this.parseLinkageSection(t_objMain); } else if (t_objMain.getName().equals("repeat")) { this.parseRepeatSection(t_objMain); } else if (t_objMain.getName().equals("aglyca")) { this.parseAglycaSection(t_objMain); } else if (t_objMain.getName().equals("underDeterminedSubtrees")) { this.parseUnderdetermindedSubtreeSection(t_objMain); } else if (t_objMain.getName().equals("alternative")) { this.parseAlternativeSection(t_objMain); } } return this.m_objSugar; } catch (GlycoconjugateException e) { throw new SugarImporterException(e.getMessage(),e); } catch (JDOMException e) { throw new SugarImporterException(e.getMessage(),e); } } private void clear() { this.m_objRoot = null; this.m_objDocument = null; this.m_objSugarUnit = null; this.m_hashResidues.clear(); this.m_hashResidues.clear(); this.m_hashRepeats.clear(); this.m_hashAlternatives.clear(); this.m_hashGraphs.clear(); this.m_hashLinkages.clear(); } private void parseResidueSection(Element a_objMainElement) throws GlycoconjugateException, JDOMException, SugarImporterException { // Parse substructure List t_lMainElements = a_objMainElement.getChildren(); for (Iterator t_iterElements = t_lMainElements.iterator(); t_iterElements.hasNext();) { Element t_objResidue = (Element) t_iterElements.next(); if ( t_objResidue.getName().equals("basetype") ) { this.parseBasetype(t_objResidue); } else if ( t_objResidue.getName().equals("substituent") ) { this.parseSubstitutent(t_objResidue); } else if ( t_objResidue.getName().equals("repeat") ) { this.parseRepeat(t_objResidue); } else if ( t_objResidue.getName().equals("alternative") ) { this.parseAlternative(t_objResidue); } else if ( t_objResidue.getName().equals("monosaccharide") ) { this.parseBasetype(t_objResidue); } } } /** * @param residue * @throws SugarImporterException * @throws GlycoconjugateException */ private void parseSubstitutent(Element a_objResidue) throws SugarImporterException, GlycoconjugateException { String t_strAttribute; t_strAttribute = a_objResidue.getAttributeValue("name"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> must have a name attribute."); } Substituent t_objSubst = new Substituent(SubstituentType.forName(t_strAttribute)); // add ms to sugar this.m_objSugarUnit.addNode(t_objSubst); this.m_hashGraphs.put(t_objSubst,this.m_objSugarUnit); t_strAttribute = a_objResidue.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> must have a id."); } Integer t_iID = Integer.parseInt(t_strAttribute); if ( this.m_hashResidues.containsKey(t_iID) ) { throw new SugarImporterException("Dupplicated residue ID."); } this.m_hashResidues.put(t_iID,t_objSubst); } private void parseBasetype(Element a_objResidue) throws GlycoconjugateException, SugarImporterException { // anomer String t_strAttribute = a_objResidue.getAttributeValue("anomer"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> of type b must have an anomer."); } Anomer t_enumAnomer = Anomer.forSymbol(t_strAttribute.charAt(0)); // superclass t_strAttribute = a_objResidue.getAttributeValue("superclass"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> of type b must have a superclass."); } Superclass t_enumSuperclass = Superclass.forName(t_strAttribute); // new MS Monosaccharide t_objMS = new Monosaccharide(t_enumAnomer,t_enumSuperclass); // ring t_strAttribute = a_objResidue.getAttributeValue("ringStart"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> of type b must have a ring start."); } int t_iStart = Integer.parseInt(t_strAttribute); t_strAttribute = a_objResidue.getAttributeValue("ringEnd"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> of type b must have a ring end."); } int t_iEnd = Integer.parseInt(t_strAttribute); t_objMS.setRing(t_iStart,t_iEnd); // sub tags List t_lMainElements = a_objResidue.getChildren(); Modification t_objModi; String t_strName; int t_iPosOne; // prepare Basetype ArrayList<BaseType> t_aBaseType = new ArrayList<BaseType>(); for (int i = 0; i < SugarImporterGlycoCT.MAX_BASE_TYPE; i++) { t_aBaseType.add(null); } for (Iterator t_iterElements = t_lMainElements.iterator(); t_iterElements.hasNext();) { Element t_objSubTag = (Element) t_iterElements.next(); if (t_objSubTag.getName().equals("basetype")) { // basetype t_strAttribute = t_objSubTag.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<basetype> of must have a id."); } int t_iPos = Integer.parseInt(t_strAttribute); t_iPos--; if ( t_iPos<0 || t_iPos> SugarImporterGlycoCT.MAX_BASE_TYPE ) { throw new SugarImporterException("<basetype> id must be a number between 0 and " + SugarImporterGlycoCT.MAX_BASE_TYPE); } t_strAttribute = t_objSubTag.getAttributeValue("type"); if ( t_strAttribute == null ) { throw new SugarImporterException("<basetype> of must have a type."); } t_aBaseType.set(t_iPos,BaseType.forName(t_strAttribute)); } else if (t_objSubTag.getName().equals("stemtype")) { // basetype t_strAttribute = t_objSubTag.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<stemtype> of must have a id."); } int t_iPos = Integer.parseInt(t_strAttribute); t_iPos--; if ( t_iPos<0 || t_iPos> SugarImporterGlycoCT.MAX_BASE_TYPE ) { throw new SugarImporterException("<stemtype> id must be a number between 0 and " + SugarImporterGlycoCT.MAX_BASE_TYPE); } t_strAttribute = t_objSubTag.getAttributeValue("type"); if ( t_strAttribute == null ) { throw new SugarImporterException("<stemtype> of must have a type."); } t_aBaseType.set(t_iPos,BaseType.forName(t_strAttribute)); } else if (t_objSubTag.getName().equals("modification")) { // modification t_strName = t_objSubTag.getAttributeValue("type"); if ( t_strName == null ) { throw new SugarImporterException("<modification> of must have a type."); } t_strAttribute = t_objSubTag.getAttributeValue("pos_one"); if ( t_strAttribute == null ) { throw new SugarImporterException("<modification> of must have a pos_one attribute."); } t_iPosOne = Integer.parseInt(t_strAttribute); t_strAttribute = t_objSubTag.getAttributeValue("pos_two"); if ( t_strAttribute == null ) { t_objModi = new Modification(t_strName,t_iPosOne); } else { int t_iPosTwo = Integer.parseInt(t_strAttribute); t_objModi = new Modification(t_strName,t_iPosOne,t_iPosTwo); } t_objMS.addModification(t_objModi); } } // store BaseType boolean t_bNull = false; for (int i = 0; i < SugarImporterGlycoCT.MAX_BASE_TYPE; i++) { if ( t_bNull ) { if ( t_aBaseType.get(i) != null ) { throw new SugarImporterException("<basetype> id " + i + " is missing."); } } else { if ( t_aBaseType.get(i) == null ) { t_bNull = true; } else { t_objMS.addBaseType(t_aBaseType.get(i)); } } } // test with glycoct name t_strAttribute = a_objResidue.getAttributeValue("name"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> must have a name attribute."); } if ( !t_objMS.getGlycoCTName().equalsIgnoreCase(t_strAttribute) ) { throw new SugarImporterException("Calculated glycoCT name and name are not equal : " + t_objMS.getGlycoCTName() + " != " + t_strAttribute); } // add ms to sugar this.m_objSugarUnit.addNode(t_objMS); this.m_hashGraphs.put(t_objMS,this.m_objSugarUnit); t_strAttribute = a_objResidue.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> must have a id."); } Integer t_iID = Integer.parseInt(t_strAttribute); if ( this.m_hashResidues.containsKey(t_iID) ) { throw new SugarImporterException("Dupplicated residue ID."); } this.m_hashResidues.put(t_iID,t_objMS); } /** * @param main * @throws SugarImporterException * @throws GlycoconjugateException */ private void parseLinkageSection(Element a_objLinkage) throws SugarImporterException, GlycoconjugateException { // so siehts aus // <linkages> // <connection id="0" parent="0" child="1"> // <lin parenttype="d" childtype="n"> // <from pos="2" /> // <to pos="1" /> // </lin> // </connection> // ... // </linkages> String t_strAttribute; GlycoNode t_objParent; GlycoNode t_objChild; Integer t_objID; List t_lMainElements = a_objLinkage.getChildren(); List t_lSubElements; List t_lSubSubElements; Linkage t_objLinkage; Element t_objLin; Element t_objConnection; Element t_objFromTo; for (Iterator t_iterElements = t_lMainElements.iterator(); t_iterElements.hasNext();) { t_objConnection = (Element) t_iterElements.next(); if ( t_objConnection.getName().equals("connection")) { GlycoEdge t_objEdge = new GlycoEdge(); t_lSubElements = t_objConnection.getChildren(); for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { t_objLin = (Element) t_iterSubElements.next(); if ( t_objLin.getName().equals("linkage")) { t_objLinkage = new Linkage(); // parent type t_strAttribute = t_objLin.getAttributeValue("parentType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a parentType."); } t_objLinkage.setParentLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // child tpye t_strAttribute = t_objLin.getAttributeValue("childType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a childType."); } t_objLinkage.setChildLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // positions t_lSubSubElements = t_objLin.getChildren(); for (Iterator t_iterSubSubElements = t_lSubSubElements.iterator(); t_iterSubSubElements.hasNext();) { t_objFromTo = (Element) t_iterSubSubElements.next(); if ( t_objFromTo.getName().equals("parent") ) { t_strAttribute = t_objFromTo.getAttributeValue("pos"); if ( t_strAttribute == null ) { throw new SugarImporterException("<from> or <to> must have a pos attribute."); } t_objID = Integer.parseInt(t_strAttribute); t_objLinkage.addParentLinkage(t_objID); } else if ( t_objFromTo.getName().equals("child") ) { t_strAttribute = t_objFromTo.getAttributeValue("pos"); if ( t_strAttribute == null ) { throw new SugarImporterException("<from> or <to> must have a pos attribute."); } t_objID = Integer.parseInt(t_strAttribute); t_objLinkage.addChildLinkage(t_objID); } } t_objEdge.addGlycosidicLinkage(t_objLinkage); } } if ( t_objEdge.getGlycosidicLinkages().size() == 0 ) { throw new SugarImporterException("<connection> must have at least on <lin> subtag."); } // add to sugar t_strAttribute = t_objConnection.getAttributeValue("parent"); if ( t_strAttribute == null ) { throw new SugarImporterException("<connection> must have a parent."); } t_objID = Integer.parseInt(t_strAttribute); t_objParent = this.m_hashResidues.get(t_objID); t_strAttribute = t_objConnection.getAttributeValue("child"); if ( t_strAttribute == null ) { throw new SugarImporterException("<connection> must have a child."); } t_objID = Integer.parseInt(t_strAttribute); t_objChild = this.m_hashResidues.get(t_objID); if ( t_objParent == null || t_objChild == null ) { throw new SugarImporterException("parent or child id invalde in <connection> ."); } this.m_objSugarUnit.addEdge(t_objParent,t_objChild,t_objEdge); t_strAttribute = t_objConnection.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<connection> must have a id."); } Integer t_iID = Integer.parseInt(t_strAttribute); if ( this.m_hashLinkages.containsKey(t_iID) ) { throw new SugarImporterException("Dupplicated linkage ID."); } this.m_hashLinkages.put(t_iID,t_objEdge); } } } /** * @param residue * @throws SugarImporterException * @throws GlycoconjugateException */ private void parseRepeat(Element a_objRepeat) throws SugarImporterException, GlycoconjugateException { String t_strAttribute; SugarUnitRepeat t_objRepeat = new SugarUnitRepeat(); // add ms to sugar this.m_objSugarUnit.addNode(t_objRepeat); this.m_hashGraphs.put(t_objRepeat,this.m_objSugarUnit); t_strAttribute = a_objRepeat.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> must have a id."); } Integer t_iID = Integer.parseInt(t_strAttribute); if ( this.m_hashResidues.containsKey(t_iID) ) { throw new SugarImporterException("Dupplicated residue ID."); } this.m_hashResidues.put(t_iID,t_objRepeat); // repeat ID t_strAttribute = a_objRepeat.getAttributeValue("repeatId"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> must have a name attribute."); } t_iID = Integer.parseInt(t_strAttribute); if ( this.m_hashRepeats.containsKey(t_iID) ) { throw new SugarImporterException("Dupplicated repeat ID."); } this.m_hashRepeats.put(t_iID,t_objRepeat); } /** * @param main * @throws SugarImporterException * @throws GlycoconjugateException */ private void parseAglycaSection(Element a_objAglyca) throws SugarImporterException, GlycoconjugateException { List t_lMainElements = a_objAglyca.getChildren(); for (Iterator t_iterElements = t_lMainElements.iterator(); t_iterElements.hasNext();) { Element t_objCategory = (Element) t_iterElements.next(); if (t_objCategory.getName().equals("historicalData")) { this.parseHistorical(t_objCategory); } else { // TODO: throw new SugarImporterException("Not supported yet " + t_objCategory.getName()); } } } /** * @param category * @throws SugarImporterException * @throws GlycoconjugateException */ private void parseHistorical(Element a_objCategory) throws SugarImporterException, GlycoconjugateException { String t_strAttribute; List t_lMainElements = a_objCategory.getChildren(); for (Iterator t_iterElements = t_lMainElements.iterator(); t_iterElements.hasNext();) { Element t_objAGL = (Element) t_iterElements.next(); if ( !t_objAGL.getName().equals("entry") ) { throw new SugarImporterException("Forbiden tag " + t_objAGL.getName() + " in historicalData section."); } t_strAttribute = t_objAGL.getAttributeValue("name"); if ( t_strAttribute == null ) { throw new SugarImporterException("<entry> must have a name attribute."); } NonMonosaccharide t_objHistorical = new NonMonosaccharide(t_strAttribute); t_strAttribute = t_objAGL.getAttributeValue("fromResidue"); if ( t_strAttribute != null ) { GlycoEdge t_objEdge = new GlycoEdge(); int t_iID = Integer.parseInt(t_strAttribute); // at the non reducing end List t_lElements = t_objAGL.getChildren(); for (Iterator t_iterSElements = t_lElements.iterator(); t_iterSElements.hasNext();) { Element t_objLin = (Element) t_iterSElements.next(); if ( !t_objLin.getName().equals("linkage")) { throw new SugarImporterException("Invalde tag " + t_objLin.getName() + " in connection section."); } Linkage t_objLinkage = new Linkage(); // parent type t_strAttribute = t_objLin.getAttributeValue("parentType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a parentType."); } t_objLinkage.setParentLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // child tpye t_strAttribute = t_objLin.getAttributeValue("childType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a childType."); } t_objLinkage.setChildLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // positions List t_lSubElements = t_objLin.getChildren(); for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { Element t_objFromTo = (Element) t_iterSubElements.next(); t_strAttribute = t_objFromTo.getAttributeValue("pos"); if ( t_strAttribute == null ) { throw new SugarImporterException("<from> or <to> must have a pos attribute."); } Integer t_objID = Integer.parseInt(t_strAttribute); if ( t_objFromTo.getName().equals("parent") ) { t_objLinkage.addParentLinkage(t_objID); } else if ( t_objFromTo.getName().equals("child") ) { t_objLinkage.addChildLinkage(t_objID); } else { throw new SugarImporterException("Invalde tag " + t_objFromTo.getName() + " in lin section."); } } t_objEdge.addGlycosidicLinkage(t_objLinkage); } if ( t_objEdge.getGlycosidicLinkages().size() == 0 ) { throw new SugarImporterException("<entry> must have at least on <lin> subtag."); } GlycoNode t_objResidue = this.m_hashResidues.get(t_iID); this.m_objSugarUnit.addEdge(t_objResidue,t_objHistorical,t_objEdge); } else { t_strAttribute = t_objAGL.getAttributeValue("toResidue"); if ( t_strAttribute != null ) { GlycoEdge t_objEdge = new GlycoEdge(); int t_iID = Integer.parseInt(t_strAttribute); // at the reducing end List t_lElements = t_objAGL.getChildren(); for (Iterator t_iterSElements = t_lElements.iterator(); t_iterSElements.hasNext();) { Element t_objLin = (Element) t_iterSElements.next(); if ( !t_objLin.getName().equals("linkage")) { throw new SugarImporterException("Invalde tag " + t_objLin.getName() + " in connection section."); } Linkage t_objLinkage = new Linkage(); // parent type t_strAttribute = t_objLin.getAttributeValue("parentType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a parentType."); } t_objLinkage.setParentLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // child tpye t_strAttribute = t_objLin.getAttributeValue("childType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a childType."); } t_objLinkage.setChildLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // positions List t_lSubElements = t_objLin.getChildren(); for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { Element t_objFromTo = (Element) t_iterSubElements.next(); t_strAttribute = t_objFromTo.getAttributeValue("pos"); if ( t_strAttribute == null ) { throw new SugarImporterException("<from> or <to> must have a pos attribute."); } Integer t_objID = Integer.parseInt(t_strAttribute); if ( t_objFromTo.getName().equals("parent") ) { t_objLinkage.addParentLinkage(t_objID); } else if ( t_objFromTo.getName().equals("child") ) { t_objLinkage.addChildLinkage(t_objID); } else { throw new SugarImporterException("Invalde tag " + t_objFromTo.getName() + " in lin section."); } } t_objEdge.addGlycosidicLinkage(t_objLinkage); } if ( t_objEdge.getGlycosidicLinkages().size() == 0 ) { throw new SugarImporterException("<entry> must have at least on <lin> subtag."); } GlycoNode t_objResidue = this.m_hashResidues.get(t_iID); this.m_objSugarUnit.addEdge(t_objHistorical,t_objResidue,t_objEdge); } else { // unconnected ms } } } } private void parseRepeatSection(Element a_objMainElement) throws SugarImporterException, GlycoconjugateException, JDOMException { // so sieht aus // <repeat> // <unit id="0" minOccur="-1" maxOccur="-1"> // <residues> // <res id="3" type="b" anomer="a" superclass="hex" ringStart="1" ringEnd="5" name="a-dglc-HEX-1:5"> // <basetype id="0" type="dglc" /> // </res> // </residues> // <linkages /> // <internalLinkage parent="3" child="3"> // <lin parentType="u" childType="u"> // <from pos="2" /> // <to pos="1" /> // </lin> // </internalLinkage> // </unit> // </repeat> String t_strAttribute; SugarUnitRepeat t_objRepeat; List t_lMainElements = a_objMainElement.getChildren(); for (Iterator t_iterElements = t_lMainElements.iterator(); t_iterElements.hasNext();) { Element t_objUnit = (Element) t_iterElements.next(); if ( t_objUnit.getName().equals("unit") ) { // find repeat unit t_strAttribute = t_objUnit.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<unit> must have a id."); } t_objRepeat = this.m_hashRepeats.get(Integer.parseInt(t_strAttribute)); if ( t_objRepeat == null ) { throw new SugarImporterException("Critical error repeat unit id " + t_strAttribute + "never declarated before."); } // min / max t_strAttribute = t_objUnit.getAttributeValue("minOccur"); if ( t_strAttribute == null ) { throw new SugarImporterException("<unit> must have a minOccur."); } t_objRepeat.setMinRepeatCount(Integer.parseInt(t_strAttribute)); t_strAttribute = t_objUnit.getAttributeValue("maxOccur"); if ( t_strAttribute == null ) { throw new SugarImporterException("<unit> must have a maxOccur."); } t_objRepeat.setMaxRepeatCount(Integer.parseInt(t_strAttribute)); this.m_objSugarUnit = t_objRepeat; List t_lSubElements = t_objUnit.getChildren(); for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { Element t_objMain = (Element) t_iterSubElements.next(); if (t_objMain.getName().equals("residues")) { this.parseResidueSection(t_objMain); } else if (t_objMain.getName().equals("linkages")) { this.parseLinkageSection(t_objMain); } else if (t_objMain.getName().equals("internalLinkage")) { this.parseInternalLinkage(t_objMain,t_objRepeat); } } } } } /** * @param a_objRepeat * @param main * @throws SugarImporterException * @throws GlycoconjugateException */ private void parseInternalLinkage(Element a_objInternal, SugarUnitRepeat a_objRepeat) throws SugarImporterException, GlycoconjugateException { // <internalLinkage parent="3" child="3"> // <lin parentType="u" childType="u"> // <from pos="2" /> // <to pos="1" /> // </lin> // </internalLinkage> String t_strAttribute = a_objInternal.getAttributeValue("parent"); if ( t_strAttribute == null ) { throw new SugarImporterException("<internalLinkage> must have a parent attribute."); } GlycoNode t_objParent = this.m_hashResidues.get(Integer.parseInt(t_strAttribute)); t_strAttribute = a_objInternal.getAttributeValue("child"); if ( t_strAttribute == null ) { throw new SugarImporterException("<internalLinkage> must have a child attribute."); } GlycoNode t_objChild = this.m_hashResidues.get(Integer.parseInt(t_strAttribute)); if ( t_objChild == null || t_objParent == null ) { throw new SugarImporterException("Error in <internalLinkage> residues are not declareded."); } // edge GlycoEdge t_objEdge = new GlycoEdge(); // fill with linkages List t_lElements = a_objInternal.getChildren(); Element t_objLin; Linkage t_objLinkage; List t_lSubElements; Element t_objFromTo; Integer t_objID; for (Iterator t_iterElements = t_lElements.iterator(); t_iterElements.hasNext();) { t_objLin = (Element) t_iterElements.next(); if ( t_objLin.getName().equals("linkage")) { t_objLinkage = new Linkage(); // parent type t_strAttribute = t_objLin.getAttributeValue("parentType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a parentType."); } t_objLinkage.setParentLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // child tpye t_strAttribute = t_objLin.getAttributeValue("childType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a childType."); } t_objLinkage.setChildLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // positions t_lSubElements = t_objLin.getChildren(); for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { t_objFromTo = (Element) t_iterSubElements.next(); if ( t_objFromTo.getName().equals("parent") ) { t_strAttribute = t_objFromTo.getAttributeValue("pos"); if ( t_strAttribute == null ) { throw new SugarImporterException("<from> or <to> must have a pos attribute."); } t_objID = Integer.parseInt(t_strAttribute); t_objLinkage.addParentLinkage(t_objID); } else if ( t_objFromTo.getName().equals("child") ) { t_strAttribute = t_objFromTo.getAttributeValue("pos"); if ( t_strAttribute == null ) { throw new SugarImporterException("<from> or <to> must have a pos attribute."); } t_objID = Integer.parseInt(t_strAttribute); t_objLinkage.addChildLinkage(t_objID); } } t_objEdge.addGlycosidicLinkage(t_objLinkage); } } if ( t_objEdge.getGlycosidicLinkages().size() == 0 ) { throw new SugarImporterException("<connection> must have at least on <lin> subtag."); } // add edge a_objRepeat.setRepeatLinkage(t_objEdge,t_objParent,t_objChild); } /** * @param main * @throws SugarImporterException * @throws GlycoconjugateException * @throws JDOMException */ private void parseUnderdetermindedSubtreeSection(Element a_objMainElement) throws SugarImporterException, GlycoconjugateException, JDOMException { String t_strAttribute; List t_lMainElements = a_objMainElement.getChildren(); for (Iterator t_iterElements = t_lMainElements.iterator(); t_iterElements.hasNext();) { UnderdeterminedSubTree t_objSubtree = new UnderdeterminedSubTree(); Element t_objTree = (Element) t_iterElements.next(); if ( t_objTree.getName().equals("tree") ) { t_strAttribute = t_objTree.getAttributeValue("probLow"); if ( t_strAttribute == null ) { throw new SugarImporterException("<tree> must have a probLow."); } Double t_dLow = Double.parseDouble(t_strAttribute); t_strAttribute = t_objTree.getAttributeValue("probUp"); if ( t_strAttribute == null ) { throw new SugarImporterException("<tree> must have a probUp."); } Double t_dHigh = Double.parseDouble(t_strAttribute); t_objSubtree.setProbability(t_dLow,t_dHigh); this.m_objSugarUnit = t_objSubtree; List t_lSubElements = t_objTree.getChildren(); ArrayList<GlycoNode> t_aParents = new ArrayList<GlycoNode>(); for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { Element t_objSubElement = (Element) t_iterSubElements.next(); if (t_objSubElement.getName().equals("residues")) { this.parseResidueSection(t_objSubElement); } else if (t_objSubElement.getName().equals("linkages")) { this.parseLinkageSection(t_objSubElement); } else if (t_objSubElement.getName().equals("parents")) { t_aParents = this.parseParents(t_objSubElement); } else if (t_objSubElement.getName().equals("connection")) { t_objSubtree.setConnection(this.parseConnection(t_objSubElement)); } } GlycoGraph t_objUnit = this.m_hashGraphs.get( t_aParents.get(0) ); GlycoNode t_objNode; if ( t_objUnit.getClass() == SugarUnitRepeat.class ) { SugarUnitRepeat t_objRepeat = (SugarUnitRepeat)t_objUnit; t_objRepeat.addUndeterminedSubTree(t_objSubtree); for (Iterator<GlycoNode> t_iterParents = t_aParents.iterator(); t_iterParents.hasNext();) { t_objNode = t_iterParents.next(); if ( this.m_hashGraphs.get( t_objNode ) != t_objUnit ) { throw new SugarImporterException("Error in <underDeterminedSubtree> all parents must be in the same unit."); } t_objRepeat.addUndeterminedSubTreeParent(t_objSubtree,t_objNode); } } else if ( t_objUnit.getClass() == Sugar.class ) { Sugar t_objRepeat = (Sugar)t_objUnit; t_objRepeat.addUndeterminedSubTree(t_objSubtree); for (Iterator<GlycoNode> t_iterParents = t_aParents.iterator(); t_iterParents.hasNext();) { t_objNode = t_iterParents.next(); if ( this.m_hashGraphs.get( t_objNode ) != t_objUnit ) { throw new SugarImporterException("Error in <underDeterminedSubtree> all parents must be in the same unit."); } t_objRepeat.addUndeterminedSubTreeParent(t_objSubtree,t_objNode); } } else { throw new SugarImporterException("Error in <underDeterminedSubtree>: Tree is connected to a " + t_objUnit.getClass().getName() + "."); } } } } /** * @param subElement * @throws SugarImporterException * @throws GlycoconjugateException */ private GlycoEdge parseConnection(Element a_objConnection) throws SugarImporterException, GlycoconjugateException { String t_strAttribute; // edge GlycoEdge t_objEdge = new GlycoEdge(); // fill with linkages List t_lElements = a_objConnection.getChildren(); Element t_objLin; Linkage t_objLinkage; List t_lSubElements; Element t_objFromTo; Integer t_objID; for (Iterator t_iterElements = t_lElements.iterator(); t_iterElements.hasNext();) { t_objLin = (Element) t_iterElements.next(); if ( t_objLin.getName().equals("linkage")) { t_objLinkage = new Linkage(); // parent type t_strAttribute = t_objLin.getAttributeValue("parentType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a parentType."); } t_objLinkage.setParentLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // child tpye t_strAttribute = t_objLin.getAttributeValue("childType"); if ( t_strAttribute == null ) { throw new SugarImporterException("<linkage> must have a childType."); } t_objLinkage.setChildLinkageType( LinkageType.forName(t_strAttribute.charAt(0))); // positions t_lSubElements = t_objLin.getChildren(); for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { t_objFromTo = (Element) t_iterSubElements.next(); if ( t_objFromTo.getName().equals("parent") ) { t_strAttribute = t_objFromTo.getAttributeValue("pos"); if ( t_strAttribute == null ) { throw new SugarImporterException("<from> or <to> must have a pos attribute."); } t_objID = Integer.parseInt(t_strAttribute); t_objLinkage.addParentLinkage(t_objID); } else if ( t_objFromTo.getName().equals("child") ) { t_strAttribute = t_objFromTo.getAttributeValue("pos"); if ( t_strAttribute == null ) { throw new SugarImporterException("<from> or <to> must have a pos attribute."); } t_objID = Integer.parseInt(t_strAttribute); t_objLinkage.addChildLinkage(t_objID); } } t_objEdge.addGlycosidicLinkage(t_objLinkage); } } if ( t_objEdge.getGlycosidicLinkages().size() == 0 ) { throw new SugarImporterException("<connection> must have at least on <lin> subtag."); } return t_objEdge; } /** * @param subElement * @return * @throws SugarImporterException */ private ArrayList<GlycoNode> parseParents(Element a_objParentElement) throws SugarImporterException { String t_strAttribute; ArrayList<GlycoNode> t_aParents = new ArrayList<GlycoNode>(); List t_lSubElements = a_objParentElement.getChildren(); Integer t_iID; GlycoNode t_objParentNode; for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { Element t_objParent = (Element) t_iterSubElements.next(); if ( t_objParent.getName().equals("parent") ) { t_strAttribute = t_objParent.getAttributeValue("res_id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<parent> must have a res_id attribute."); } t_iID = Integer.parseInt(t_strAttribute); t_objParentNode = this.m_hashResidues.get(t_iID); if ( t_objParentNode == null ) { throw new SugarImporterException("Invalide <parent>. Residue was not declareted before."); } t_aParents.add(t_objParentNode); } } return t_aParents; } /** * @param residue * @throws SugarImporterException * @throws GlycoconjugateException */ private void parseAlternative(Element a_objAlternativeElement) throws SugarImporterException, GlycoconjugateException { String t_strAttribute; SugarUnitAlternative t_objAlternative = new SugarUnitAlternative(); // add ms to sugar this.m_objSugarUnit.addNode(t_objAlternative); this.m_hashGraphs.put(t_objAlternative,this.m_objSugarUnit); t_strAttribute = a_objAlternativeElement.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> must have a id."); } Integer t_iID = Integer.parseInt(t_strAttribute); if ( this.m_hashResidues.containsKey(t_iID) ) { throw new SugarImporterException("Dupplicated residue ID."); } this.m_hashResidues.put(t_iID,t_objAlternative); // alternativeId t_strAttribute = a_objAlternativeElement.getAttributeValue("alternativeId"); if ( t_strAttribute == null ) { throw new SugarImporterException("<res> must have a name attribute."); } t_iID = Integer.parseInt(t_strAttribute); if ( this.m_hashAlternatives.containsKey(t_iID) ) { throw new SugarImporterException("Dupplicated alternative ID."); } this.m_hashAlternatives.put(t_iID,t_objAlternative); } /** * @param main * @throws SugarImporterException * @throws GlycoconjugateException * @throws JDOMException */ private void parseAlternativeSection(Element a_objAlternativeElement) throws SugarImporterException, GlycoconjugateException, JDOMException { String t_strAttribute; List t_lAlternativeElements = a_objAlternativeElement.getChildren(); for (Iterator t_iterElements = t_lAlternativeElements.iterator(); t_iterElements.hasNext();) { Element t_objUnitElement = (Element) t_iterElements.next(); if ( t_objUnitElement.getName().equals("unit")) { // unit t_strAttribute = t_objUnitElement.getAttributeValue("id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<unit> must have a id."); } SugarUnitAlternative t_objAlternative = this.m_hashAlternatives.get(Integer.parseInt(t_strAttribute)); if ( t_objAlternative == null ) { throw new SugarImporterException("Critical error alternative unit id " + t_strAttribute + " never declarated before."); } List t_lUnitElements = t_objUnitElement.getChildren(); for (Iterator t_iterSubstructure = t_lUnitElements.iterator(); t_iterSubstructure.hasNext();) { Element t_objSubstructuresElement = (Element) t_iterSubstructure.next(); if ( t_objSubstructuresElement.getName().equals("substructure") ) { // substructures GlycoGraphAlternative t_objAltGraph = new GlycoGraphAlternative(); t_objAlternative.addAlternative(t_objAltGraph); this.m_objSugarUnit = t_objAltGraph; List t_lSubElements = t_objSubstructuresElement.getChildren(); for (Iterator t_iterSubElements = t_lSubElements.iterator(); t_iterSubElements.hasNext();) { Element t_objSubElement = (Element) t_iterSubElements.next(); if (t_objSubElement.getName().equals("residues")) { this.parseResidueSection(t_objSubElement); } else if (t_objSubElement.getName().equals("linkages")) { this.parseLinkageSection(t_objSubElement); } else if (t_objSubElement.getName().equals("lead_in")) { t_strAttribute = t_objSubElement.getAttributeValue("residue_id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<lead_in> must have a residue_id."); } GlycoNode t_objNode = this.m_hashResidues.get(Integer.parseInt(t_strAttribute)); t_objAlternative.setLeadInNode(t_objNode,t_objAltGraph); } else if (t_objSubElement.getName().equals("lead_out")) { t_strAttribute = t_objSubElement.getAttributeValue("residue_id"); if ( t_strAttribute == null ) { throw new SugarImporterException("<lead_out> must have a residue_id."); } GlycoNode t_objNodeInner = this.m_hashResidues.get(Integer.parseInt(t_strAttribute)); t_strAttribute = t_objSubElement.getAttributeValue("connected_to"); if ( t_strAttribute == null ) { throw new SugarImporterException("<lead_out> must have a connected_to."); } GlycoNode t_objNodeOuter = this.m_hashResidues.get(Integer.parseInt(t_strAttribute)); if ( t_objNodeInner == null || t_objNodeOuter == null ) { throw new SugarImporterException("Values for lead out are invalide. Residues are not defined before."); } t_objAlternative.addLeadOutNodeToNode(t_objNodeInner,t_objAltGraph,t_objNodeOuter); } } } } } } } }