/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ /** * */ package org.eurocarbdb.MolecularFramework.io.cfg; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import org.eurocarbdb.MolecularFramework.io.SugarImporterException; import org.eurocarbdb.MolecularFramework.io.SugarImporterText; import org.eurocarbdb.MolecularFramework.sugar.GlycoEdge; import org.eurocarbdb.MolecularFramework.sugar.GlycoGraph; import org.eurocarbdb.MolecularFramework.sugar.GlycoNode; import org.eurocarbdb.MolecularFramework.sugar.GlycoconjugateException; import org.eurocarbdb.MolecularFramework.sugar.Linkage; import org.eurocarbdb.MolecularFramework.sugar.LinkageType; import org.eurocarbdb.MolecularFramework.sugar.Sugar; import org.eurocarbdb.MolecularFramework.sugar.SugarUnitRepeat; import org.eurocarbdb.MolecularFramework.sugar.UnderdeterminedSubTree; import org.eurocarbdb.MolecularFramework.sugar.UnvalidatedGlycoNode; /** * http://www.glycominds.com/index.asp?menu=Research&page=glycoit# * * @author Logan * */ public class SugarImporterCFG extends SugarImporterText { private boolean m_bAllowCyclic = true; private HashMap<Integer,CFGUnderdeterminedTree> m_hashSubtrees = new HashMap<Integer,CFGUnderdeterminedTree>(); /* (non-Javadoc) * @see org.eurocarbdb.MolecularFramework.io.SugarImporterText#start() */ @Override protected void start() throws SugarImporterException { this.m_hashSubtrees.clear(); // split up uncertain terminal definitions String[] t_aStrings = this.m_strText.split("%\\|"); int t_iMax = t_aStrings.length - 1; this.m_bAllowCyclic = true; for (int t_iCounter = 0; t_iCounter < t_iMax; t_iCounter++) { // store uncertain terminal residue this.m_strText = t_aStrings[t_iCounter]+ "$"; this.m_iPosition = -1; this.m_iLength = this.m_strText.length(); this.nextToken(); CFGUnderdeterminedTree t_objTree = this.parseUnderdetermined(); this.m_hashSubtrees.put(t_objTree.getId(),t_objTree); if ( this.m_cToken != '$' ) { throw new SugarImporterException("CFG007",-2); } } this.m_bAllowCyclic = true; this.m_strText = t_aStrings[t_aStrings.length-1]; this.m_iPosition = -1; this.m_iLength = this.m_strText.length(); // start parsing this.nextToken(); this.mainchain(this.m_objSugar); // finished ? if ( this.m_cToken != '$' ) { throw new SugarImporterException("CFG005",-1); } } /** * @param subtree * @return * @throws SugarImporterException * @throws */ private CFGUnderdeterminedTree parseUnderdetermined() throws SugarImporterException { CFGUnderdeterminedTree t_objCFGSubTree = new CFGUnderdeterminedTree(); UnderdeterminedSubTree t_objSubTree = new UnderdeterminedSubTree(); // parse CFGSubTree t_objBranch = this.subbranch(t_objSubTree); if ( t_objBranch.getId() != null ) { throw new SugarImporterException("CFG012",this.m_iPosition); } GlycoEdge t_objEdge = t_objBranch.getGlycoEdge(); for (Iterator<Linkage> t_iterLinkage = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkage.hasNext();) { Linkage t_objLink = t_iterLinkage.next(); try { t_objLink.setParentLinkageType(LinkageType.H_AT_OH); t_objLink.setChildLinkageType(LinkageType.DEOXY); } catch (GlycoconjugateException e) {} } t_objSubTree.setConnection(t_objEdge); // parse = and number if ( this.m_cToken != '=' ) { throw new SugarImporterException("CFG006",this.m_iPosition); } this.nextToken(); t_objCFGSubTree.setId(this.number()); t_objCFGSubTree.setTree(t_objSubTree); return t_objCFGSubTree; } // <residuename> { <position> { <subbranch> } <residuename> } private void mainchain(GlycoGraph a_objGraph) throws SugarImporterException { try { int t_iStartPosition = 0; CFGSubTree t_objSubTreeMain = new CFGSubTree(); ArrayList<CFGSubTree> t_aSubTrees = new ArrayList<CFGSubTree>(); ArrayList<Integer> t_aPostions; UnvalidatedGlycoNode t_objNode; int t_iDigit = (int)this.m_cToken; if ( t_iDigit > 47 && t_iDigit < 58 ) { // uncertain subtree t_objSubTreeMain.setId(this.number()); if ( this.m_cToken != '%' ) { throw new SugarImporterException("CFG004",this.m_iPosition); } this.nextToken(); } else { t_iStartPosition = this.m_iPosition; this.residuename(); t_objNode = new UnvalidatedGlycoNode(); t_objNode.setName( this.m_strText.substring( t_iStartPosition , this.m_iPosition ) ); a_objGraph.addNode(t_objNode); t_objSubTreeMain.setGlycoNode(t_objNode); } while ( this.m_cToken != '$' && this.m_cToken != '#' && this.m_cToken != ';' && this.m_cToken != ':' ) { t_aSubTrees.add(t_objSubTreeMain); if ( t_objSubTreeMain.getId() == null ) { t_aPostions = this.position(); Linkage t_objLinkage = new Linkage(); t_objLinkage.addChildLinkage(1); t_objLinkage.setParentLinkages(t_aPostions); GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objLinkage); t_objSubTreeMain.setGlycoEdge(t_objEdge); } while ( this.m_cToken == '(' ) { this.nextToken(); t_aSubTrees.add(this.subbranch(a_objGraph)); if ( this.m_cToken != ')' ) { throw new SugarImporterException("CFG004",this.m_iPosition); } this.nextToken(); } // create parent residue t_iStartPosition = this.m_iPosition; this.residuename(); t_objNode= new UnvalidatedGlycoNode(); t_objNode.setName( this.m_strText.substring( t_iStartPosition , this.m_iPosition ) ); a_objGraph.addNode(t_objNode); // add branches for (Iterator<CFGSubTree> t_iterSubTrees = t_aSubTrees.iterator(); t_iterSubTrees.hasNext();) { CFGSubTree t_objSubTree = t_iterSubTrees.next(); if ( t_objSubTree.getId() == null ) { // normal branch a_objGraph.addEdge(t_objNode,t_objSubTree.getGlycoNode(),t_objSubTree.getGlycoEdge()); } else { // uncertain subtree CFGUnderdeterminedTree t_objUTree = this.m_hashSubtrees.get(t_objSubTree.getId()); try { this.addUncertainBranch(a_objGraph,t_objUTree,t_objNode); } catch (GlycoconjugateException e) { throw new SugarImporterException("CFG011",this.m_iPosition); } } } t_aSubTrees.clear(); t_objSubTreeMain.setGlycoNode(t_objNode); t_objSubTreeMain.setId(null); } // aglyca ? if ( this.m_cToken == '#' || this.m_cToken == ';' || this.m_cToken == ':' ) { t_iStartPosition = this.m_iPosition; while ( this.m_cToken != '$' ) { this.nextToken(); } t_objNode = new UnvalidatedGlycoNode(); t_objNode.setName( this.m_strText.substring( t_iStartPosition , this.m_iPosition ) ); a_objGraph.addNode(t_objNode); Linkage t_objLinkage = new Linkage(); t_objLinkage.addChildLinkage(1); t_objLinkage.addParentLinkage(Linkage.UNKNOWN_POSITION); GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objLinkage); a_objGraph.addEdge(t_objNode,t_objSubTreeMain.getGlycoNode(),t_objEdge); } } catch (GlycoconjugateException e) { throw new SugarImporterException("COMMON013",this.m_iPosition); } } /** * @param graph * @return * @throws SugarImporterException */ private ArrayList<Integer> position() throws SugarImporterException { ArrayList<Integer> t_aPositions = new ArrayList<Integer>(); if ( this.m_cToken == '?' ) { t_aPositions.add(Linkage.UNKNOWN_POSITION); this.nextToken(); } else { t_aPositions.add(this.number()); while ( this.m_cToken == '/' ) { this.nextToken(); t_aPositions.add(this.number()); } } return t_aPositions; } /** * @param graph * @throws SugarImporterException */ private void residuename() throws SugarImporterException { if ( this.m_cToken == '?' ) { this.nextToken(); // modification ? if ( this.m_cToken == '[' ) { this.modification(); } // unknown residue if ( this.m_cToken == '?' || this.m_cToken == 'a' || this.m_cToken == 'b' || this.m_cToken == 'o' ) { this.nextToken(); } else { if ( this.m_cToken != '$' && this.m_cToken != ';' && this.m_cToken != ':' && this.m_cToken != '#' ) { throw new SugarImporterException("CFG000",this.m_iPosition); } } } else { boolean t_bNameMissing = true; while ( (this.m_cToken >= 'A' && this.m_cToken <= 'Z') || this.m_cToken == '\'' || this.m_cToken == '^' || this.m_cToken == '~' ) { this.nextToken(); t_bNameMissing = false; } if ( t_bNameMissing ) { throw new SugarImporterException("CFG001",this.m_iPosition); } if ( this.m_cToken == '[' ) { this.modification(); } if ( this.m_cToken == '?' || this.m_cToken == 'a' || this.m_cToken == 'b' || this.m_cToken == 'o' ) { this.nextToken(); } else { if ( this.m_cToken != '$' && this.m_cToken != ';' && this.m_cToken != ':' && this.m_cToken != '#' ) { throw new SugarImporterException("CFG000",this.m_iPosition); } } } } /** * @param node * @param graph * @throws SugarImporterException */ private void modification() throws SugarImporterException { int t_iDigit; if ( this.m_cToken != '[' ) { throw new SugarImporterException("CFG002",this.m_iPosition); } this.nextToken(); while ( this.m_cToken != ']' ) { if ( this.m_cToken < 'A' || this.m_cToken > 'Z' ) { if ( this.m_cToken < 'a' || this.m_cToken > 'z' ) { t_iDigit = (int)this.m_cToken; if ( t_iDigit < 48 && t_iDigit > 57 ) { if ( this.m_cToken != '*' ) { throw new SugarImporterException("CFG003",this.m_iPosition); } } } } this.nextToken(); } this.nextToken(); } // <residuename> <position> { { <subbranch> } <residuename> <position> } private CFGSubTree subbranch(GlycoGraph a_objGraph) throws SugarImporterException { CFGSubTree t_objSubTreeMain = new CFGSubTree(); ArrayList<CFGSubTree> t_aSubTrees = new ArrayList<CFGSubTree>(); UnvalidatedGlycoNode t_objNode; ArrayList<Integer> t_aPostions; try { int t_iDigit = (int)this.m_cToken; if ( t_iDigit > 47 && t_iDigit < 58 ) { // uncertain subtree t_objSubTreeMain.setId(this.number()); if ( this.m_cToken != '%' ) { throw new SugarImporterException("CFG004",this.m_iPosition); } this.nextToken(); } else { // <residuename> int t_iStartPosition = this.m_iPosition; this.residuename(); t_objNode = new UnvalidatedGlycoNode(); t_objNode.setName( this.m_strText.substring( t_iStartPosition , this.m_iPosition ) ); a_objGraph.addNode(t_objNode); // <residuename> <position> t_aPostions = this.position(); Linkage t_objLinkage = new Linkage(); t_objLinkage.addChildLinkage(1); t_objLinkage.setParentLinkages(t_aPostions); GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objLinkage); t_objSubTreeMain.setGlycoEdge(t_objEdge); t_objSubTreeMain.setGlycoNode(t_objNode); } while ( this.m_cToken != ')' && this.m_cToken != '=' ) { t_aSubTrees.add(t_objSubTreeMain); // <residuename> <position> { } while ( this.m_cToken == '(' ) { // <residuename> <position> { { <subbranch> } } this.nextToken(); t_aSubTrees.add(this.subbranch(a_objGraph)); if ( this.m_cToken != ')' ) { throw new SugarImporterException("CFG004",this.m_iPosition); } this.nextToken(); } // <residuename> <position> { { <subbranch> } <residuename> } int t_iStartPosition = this.m_iPosition; this.residuename(); t_objNode = new UnvalidatedGlycoNode(); t_objNode.setName( this.m_strText.substring( t_iStartPosition , this.m_iPosition ) ); a_objGraph.addNode(t_objNode); // add branches for (Iterator<CFGSubTree> t_iterSubTrees = t_aSubTrees.iterator(); t_iterSubTrees.hasNext();) { CFGSubTree t_objSubTree = t_iterSubTrees.next(); if ( t_objSubTree.getId() == null ) { // normal branch a_objGraph.addEdge(t_objNode,t_objSubTree.getGlycoNode(),t_objSubTree.getGlycoEdge()); } else { // uncertain subtree CFGUnderdeterminedTree t_objUTree = this.m_hashSubtrees.get(t_objSubTree.getId()); try { this.addUncertainBranch(a_objGraph,t_objUTree,t_objNode); } catch (GlycoconjugateException e) { throw new SugarImporterException("CFG011",this.m_iPosition); } } } t_aSubTrees.clear(); // <residuename> <position> { { <subbranch> } <residuename> <position> } t_aPostions = this.position(); t_objSubTreeMain.setId(null); t_objSubTreeMain.setGlycoNode(t_objNode); Linkage t_objLinkage = new Linkage(); t_objLinkage.addChildLinkage(1); t_objLinkage.setParentLinkages(t_aPostions); GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objLinkage); t_objSubTreeMain.setGlycoEdge(t_objEdge); } } catch (GlycoconjugateException e) { throw new SugarImporterException("COMMON013",this.m_iPosition); } return t_objSubTreeMain; } /** * @param graph * @param tree * @throws SugarImporterException * @throws GlycoconjugateException */ private void addUncertainBranch(GlycoGraph a_objGraph, CFGUnderdeterminedTree a_objTree,GlycoNode a_objNode) throws SugarImporterException, GlycoconjugateException { if ( a_objTree == null ) { throw new SugarImporterException("CFG009",this.m_iPosition); } if ( a_objGraph.getClass() == Sugar.class ) { Sugar t_objSugar = (Sugar)a_objGraph; if ( !a_objTree.isAdded() ) { t_objSugar.addUndeterminedSubTree(a_objTree.getTree()); a_objTree.setAdded(true); } t_objSugar.addUndeterminedSubTreeParent(a_objTree.getTree(),a_objNode); } else if ( a_objGraph.getClass() == SugarUnitRepeat.class ) { SugarUnitRepeat t_objSugar = (SugarUnitRepeat)a_objGraph; if ( !a_objTree.isAdded() ) { t_objSugar.addUndeterminedSubTree(a_objTree.getTree()); a_objTree.setAdded(true); } t_objSugar.addUndeterminedSubTreeParent(a_objTree.getTree(),a_objNode); } else { throw new SugarImporterException("CFG010",this.m_iPosition); } } }