/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ package org.eurocarbdb.MolecularFramework.io.carbbank; import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; import org.eurocarbdb.MolecularFramework.io.StructureSpecialInformation; import org.eurocarbdb.MolecularFramework.io.SugarImporter; import org.eurocarbdb.MolecularFramework.io.SugarImporterException; import org.eurocarbdb.MolecularFramework.sugar.GlycoEdge; import org.eurocarbdb.MolecularFramework.sugar.GlycoGraph; import org.eurocarbdb.MolecularFramework.sugar.GlycoNode; import org.eurocarbdb.MolecularFramework.sugar.GlycoconjugateException; import org.eurocarbdb.MolecularFramework.sugar.Linkage; import org.eurocarbdb.MolecularFramework.sugar.Sugar; import org.eurocarbdb.MolecularFramework.sugar.SugarUnitRepeat; import org.eurocarbdb.MolecularFramework.sugar.UnvalidatedGlycoNode; /** * * * @author rene * */ public class SugarImporterCarbbank extends SugarImporter { private HashMap<String,Integer> m_hashRepeatCountMin = new HashMap<String,Integer>(); private HashMap<String,Integer> m_hashRepeatCountMax = new HashMap<String,Integer>(); private int m_iMinRepeatCount = -2; private int m_iMaxRepeatCount = -2; private ArrayList<StructureSpecialInformation> m_aSpecialList = new ArrayList<StructureSpecialInformation>(); private String [] m_aLines; private int m_iLineCount = 0; private int m_iCharacterCounter = 0; private ArrayList<String> m_aWarnings = new ArrayList<String>(); /* (non-Javadoc) * @see org.eurocarbdb.glybelfish.SugarImporter#parse(java.lang.String) */ @Override public Sugar parse(String a_strStream) throws SugarImporterException { // reset this.m_aWarnings.clear(); this.m_iCharacterCounter = 0; this.m_iLineCount = 0; this.m_objSugar = new Sugar(); this.m_aSpecialList.clear(); // start parsing int t_iLongestLine = 0; int t_iLineLength = 0; String t_strStructure = a_strStream.replace('\r',' '); t_strStructure = t_strStructure.replaceAll("\t"," "); this.m_aLines = t_strStructure.split("\n"); // find longest String this.m_iLineCount = this.m_aLines.length; for (int i = 0 ; i < this.m_iLineCount ; i++) { t_iLineLength = this.m_aLines[i].length(); if ( t_iLongestLine < t_iLineLength ) { t_iLongestLine = t_iLineLength; } } // create a complette 2D Matrix with Linecount x longestline (fill with " ") for (int i = 0 ; i < this.m_iLineCount ; i++) { this.m_aLines[i] += this.createFillString( t_iLongestLine - this.m_aLines[i].length() ); } // now we have to find the start point , most right non space character for ( int x = (t_iLongestLine - 1) ; x > -1 ; x--) { for ( int y = 0 ; y < this.m_iLineCount ; y++) { if ( this.m_aLines[y].charAt( x ) != ' ' ) { // found it, start parsing // for cyclic and repear units, it starts with "-" "(" <zahl> "-" if ( this.m_aLines[y].charAt( x ) == '-' ) { if ( x < 4 ) { throw new SugarImporterException("IUPAC2D000", x , y ); } // "-" "(" <zahl> if ( this.isCyclic() ) { // cyclic Linkage t_objLinkage = new Linkage(); StructureSpecialInformation t_objSpezialInfo = new StructureSpecialInformation(null,t_objLinkage,null,this.m_objSugar); this.m_aSpecialList.add(t_objSpezialInfo); this.parseLeftLinkage(x,y,null,null,t_objLinkage,t_objSpezialInfo,this.m_objSugar); } else { // repeat SugarUnitRepeat t_objRepeat = new SugarUnitRepeat(); try { this.m_objSugar.addNode(t_objRepeat); } catch (GlycoconjugateException e) { throw new SugarImporterException("COMMON013", x , y ); } int t_iMin = this.m_iMinRepeatCount; int t_iMax = this.m_iMaxRepeatCount; if ( t_iMax == -2 ) { t_iMax = SugarUnitRepeat.UNKNOWN; this.m_aWarnings.add("max. number of repeat interval for outer repeat not set"); } if ( t_iMin == -2 ) { t_iMin = SugarUnitRepeat.UNKNOWN; this.m_aWarnings.add("min. number of repeat interval for outer repeat not set"); } t_objRepeat.setMaxRepeatCount( this.m_iMaxRepeatCount ); t_objRepeat.setMinRepeatCount( this.m_iMinRepeatCount ); Linkage t_objLinkage = new Linkage(); StructureSpecialInformation t_objSpezialInfo = new StructureSpecialInformation(null,t_objLinkage,t_iMin,t_iMax,t_objRepeat,null,this.m_objSugar); this.m_aSpecialList.add(t_objSpezialInfo); this.parseLeftLinkage(x,y,null,null,t_objLinkage,t_objSpezialInfo,t_objRepeat); } } else { // parent, linkage, speciealinfo, graph, start residue muss noch gesetzt werden? this.lookAround( x , y , null , null , null, this.m_objSugar ,false , ""); } x = -1; y = this.m_iLineCount; } } } // we are finished // haben wir wirklich alles gefunden? int t_iChars = 0; for ( int y = 0 ; y < this.m_iLineCount ; y++) { for (int x = 0; x < t_iLongestLine; x++) { if ( this.m_aLines[y].charAt(x) != ' ' ) { t_iChars++; } } } if ( t_iChars != this.m_iCharacterCounter ) { throw new SugarImporterException("IUPAC2D011", -1 , -1 ); } // all spezial units closed? for (Iterator<StructureSpecialInformation> t_iterSpezials = this.m_aSpecialList.iterator(); t_iterSpezials.hasNext();) { if ( !t_iterSpezials.next().isClosed() ) { throw new SugarImporterException("IUPAC2D012", -1 , -1 ); } } return this.m_objSugar; } /** * The Coordinate describe the most right charakter of the monosaccharid (part of the name) * * @param a_iPosX * @param a_iPosY */ private void lookAround(int a_iPosX, int a_iPosY, GlycoNode a_objParent, Linkage a_objParentLinkage, StructureSpecialInformation a_objSpezialinfo, GlycoGraph a_objGraph, boolean a_bLookingForTarget, String a_strResidueNamePart) throws SugarImporterException { int t_iHelper = 0; int t_iLine = 0; // first we have to find out the name of the Glycan ==> return is the position of the first Charakter of the name int t_iNameEnd = this.findName( a_iPosX , a_iPosY ); this.m_iCharacterCounter += a_iPosX - t_iNameEnd + 1; UnvalidatedGlycoNode t_objResidue = new UnvalidatedGlycoNode(); try { t_objResidue.setName( a_strResidueNamePart + this.m_aLines[a_iPosY].substring( t_iNameEnd , a_iPosX + 1 ) ); // there are withespaces in the name? String t_strTemp = t_objResidue.getName(); int t_iMax = t_strTemp.length(); for (int t_iCounter = 0; t_iCounter < t_iMax; t_iCounter++) { if ( t_strTemp.charAt(t_iCounter) == ' ' ) { this.m_iCharacterCounter--; } } a_objGraph.addNode( t_objResidue ); if ( a_objParent != null && a_objParentLinkage != null ) { // there is a parent and we have to add GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(a_objParentLinkage); a_objGraph.addEdge(a_objParent,t_objResidue,t_objEdge); } if ( a_bLookingForTarget ) { a_objSpezialinfo.setTarget(t_objResidue); } } catch (GlycoconjugateException e) { throw new SugarImporterException("IUPAC2D010", a_iPosX , a_iPosY ); } // now we look for connected glycans // same line? ( 7 because there must be at least a linkage before "-(x-y)-" if ( t_iNameEnd > 7 ) { // ok there could be more, lets look if ( this.m_aLines[a_iPosY].charAt( t_iNameEnd - 1 ) != ' ' ) { // follow the line/linkage to the next residue this.followLinkageBefore( t_iNameEnd - 1 , a_iPosY , t_objResidue , a_objSpezialinfo, a_objGraph ); } } // above? ( there must be at least 1 lines ) if ( a_iPosY > 0 ) { t_iLine = a_iPosY - 1; // there is a line above, so lets look for ( t_iHelper = t_iNameEnd ; t_iHelper <= a_iPosX ; t_iHelper++) { if ( this.m_aLines[t_iLine].charAt( t_iHelper ) == '|' ) { // follow the linkage to the next residue this.followLinkageAbove( t_iHelper , t_iLine , t_objResidue , a_objSpezialinfo, a_objGraph ); } } } // below? ( there must be at least 1 lines ) if ( a_iPosY < this.m_iLineCount - 2 ) { t_iLine = a_iPosY + 1; // there is a line below, so lets look for ( t_iHelper = t_iNameEnd ; t_iHelper <= a_iPosX ; t_iHelper++) { if ( this.m_aLines[t_iLine].charAt( t_iHelper ) == '|' ) { // follow the linkage to the next residue this.followLinkageBelow( t_iHelper , t_iLine , t_objResidue , a_objSpezialinfo, a_objGraph ); } } } // finished with this monosaccharid, lets go back } private String createFillString( int a_iCount ) { String t_strResult = ""; for (int i = 0; i < a_iCount; i++) { t_strResult += " "; } return t_strResult; } /** * * @param a_iPosX first charakter of the Linkage * @throws SugarImporterException */ private void followLinkageBefore(int a_iPosX, int a_iPosY , GlycoNode a_objParent, StructureSpecialInformation a_objSpezialinfo, GlycoGraph a_objGraph) throws SugarImporterException { int t_iX = a_iPosX; // first we have to parse the linkage "-" "(" <left linkage> "-" <right linkage> ")" "-" if ( this.m_aLines[a_iPosY].charAt(t_iX) != '-' ) { throw new SugarImporterException("IUPAC2D001", t_iX , a_iPosY ); } t_iX--; this.m_iCharacterCounter++; this.parseStartLinkage(t_iX,a_iPosY,a_objParent,a_objSpezialinfo,a_objGraph); } /** * Find a Residue above the current Residue. Start is the position there the first '|' should be. * * @param a_iPosX * @param a_iPosY * @throws SugarImporterException */ private void followLinkageAbove(int a_iPosX, int a_iPosY , GlycoNode a_objParent, StructureSpecialInformation a_objSpezialinfo, GlycoGraph a_objGraph) throws SugarImporterException { int t_iX = a_iPosX; int t_iY = a_iPosY; // until the '+' ==> follow the '|' do { if ( this.m_aLines[t_iY].charAt(t_iX) != '|' ) { throw new SugarImporterException("IUPAC2D002", t_iX , t_iY ); } t_iY--; this.m_iCharacterCounter++; if ( t_iY < 0 ) { throw new SugarImporterException("IUPAC2D003", t_iX , t_iY ); } } while ( this.m_aLines[t_iY].charAt(t_iX) != '+' ); // we found the '+' ==> now horizontal parsing "-" "(" <zahl> "-" <zahl> ")" "+" // but we have to check the position against the size of the array // "-" "(" <zahl> "-" <zahl> ")" if ( --t_iX < 0) { throw new SugarImporterException( "IUPAC2D004", t_iX , a_iPosY ); } this.m_iCharacterCounter++; this.parseStartLinkage(t_iX,t_iY,a_objParent,a_objSpezialinfo,a_objGraph); } /** * Find a Residue below the current Residue. Start is the position there the first '|' should be. * * @param a_iPosX * @param a_iPosY * @throws SugarImporterException */ private void followLinkageBelow(int a_iPosX, int a_iPosY , GlycoNode a_objParent, StructureSpecialInformation a_objSpezialinfo, GlycoGraph a_objGraph ) throws SugarImporterException { int t_iX = a_iPosX; int t_iY = a_iPosY; // until the '+' ==> follow the '|' do { if ( this.m_aLines[t_iY].charAt(t_iX) != '|' ) { throw new SugarImporterException( "IUPAC2D002", t_iX , t_iY ); } t_iY++; this.m_iCharacterCounter++; if ( t_iY >= this.m_iLineCount ) { throw new SugarImporterException("IUPAC2D003", t_iX , t_iY ); } } while ( this.m_aLines[t_iY].charAt(t_iX) != '+' ); // we found the '+' ==> now horizontal parsing "-" "(" <zahl> "-" <zahl> ")" "+" // but we have to check the position against the size of the array // "-" "(" <zahl> "-" <zahl> ")" if ( --t_iX < 0) { throw new SugarImporterException( "IUPAC2D004", t_iX , t_iY ); } this.m_iCharacterCounter++; this.parseStartLinkage(t_iX,t_iY,a_objParent,a_objSpezialinfo,a_objGraph); } /** * Methode to find the name of a residue. A-Z a-z - ? <number> , "<" ">" "." ")" "(" "/" "[" "]" * @param posX * @param posY * @return */ private int findName(int a_iPosX, int a_iPosY) { int t_iX = a_iPosX; boolean t_bCharakter = false; char t_cSign = ' '; int t_iSign = 0; do { t_cSign = this.m_aLines[a_iPosY].charAt(t_iX); t_iSign = (int)t_cSign; t_bCharakter = false; if ( t_cSign >= 'A' && t_cSign <= 'Z' ) { t_bCharakter = true; } else if ( t_cSign >= 'a' && t_cSign <= 'z' ) { t_bCharakter = true; } else if ( t_iSign > 47 && t_iSign < 58 ) { t_bCharakter = true; } else if ( t_cSign == '?' || t_cSign == '/' ) { t_bCharakter = true; } else if ( t_cSign == ',' || t_cSign == '.' ) { t_bCharakter = true; } else if ( t_cSign >= '0' && t_cSign <= '9' ) { t_bCharakter = true; } else if ( t_cSign == '<' || t_cSign == '>' ) { t_bCharakter = true; } else if ( t_cSign == '[' || t_cSign == ']' ) { t_bCharakter = true; } else if ( t_cSign == '(' || t_cSign == ')' ) { t_bCharakter = true; } else if ( t_cSign == '\'' || t_cSign == ':' ) { t_bCharakter = true; } else if ( t_cSign == '+' || t_cSign == '=' ) { t_bCharakter = true; } else if ( t_cSign == ';' || t_cSign == '_') { t_bCharakter = true; } else if ( t_cSign == '-' ) { if ( t_iX < 1 ) { t_bCharakter = true; } else { if ( this.m_aLines[a_iPosY].charAt(t_iX-1) != ')' ) { t_bCharakter = true; } else { if ( t_iX < 2 ) { t_bCharakter = true; } else { int t_iDigit = this.m_aLines[a_iPosY].charAt(t_iX-2); if ( (t_iDigit > 47 && t_iDigit < 58) || this.m_aLines[a_iPosY].charAt(t_iX-2) == '?' ) { if ( t_iX < 3 ) { t_bCharakter = true; } else { t_iDigit = this.m_aLines[a_iPosY].charAt(t_iX-3); if (this.m_aLines[a_iPosY].charAt(t_iX-3) == '(' ) { t_bCharakter = true; } else if (t_iDigit > 47 && t_iDigit < 58) { // noch ne zahl if ( t_iX < 4 ) { t_bCharakter = false; } else { if (this.m_aLines[a_iPosY].charAt(t_iX-4) == '(' ) { t_bCharakter = true; } else { t_bCharakter = false; } } } else { t_bCharakter = false; } } } else if ( this.m_aLines[a_iPosY].charAt(t_iX-2) == '\'' || this.m_aLines[a_iPosY].charAt(t_iX-2) == '\"' ) { t_bCharakter = false; } else { if ( this.m_aLines[a_iPosY].charAt(t_iX-2) == '?' || this.m_aLines[a_iPosY].charAt(t_iX-2) == 'O' || this.m_aLines[a_iPosY].charAt(t_iX-2) == 'N' || this.m_aLines[a_iPosY].charAt(t_iX-2) == 'S' ) { if ( t_iX < 3 ) { t_bCharakter = true; } else { if ( this.m_aLines[a_iPosY].charAt(t_iX-3) == '-' || ( this.m_aLines[a_iPosY].charAt(t_iX-2) == 'O' && this.m_aLines[a_iPosY].charAt(t_iX-3) == '[' )) { t_bCharakter = false; } else { t_bCharakter = true; } } } else { t_bCharakter = true; } } } } } } else if ( t_cSign == ' ' && t_iX > 0 ) { if ( this.m_aLines[a_iPosY].charAt(t_iX-1) != ' ' && this.m_aLines[a_iPosY].charAt(t_iX-1) != '+' && this.m_aLines[a_iPosY].charAt(t_iX-1) != '|' ) { t_bCharakter = true; } } t_iX--; // last charakter reached if ( t_iX == -1 ) { if ( t_bCharakter ) { t_iX--; } t_bCharakter = false; } } while ( t_bCharakter ); return ( t_iX + 2 ); } /** * @param t_ix * @param t_iy * @throws SugarImporterException */ private void parseStartLinkage(int a_iX, int a_iY, GlycoNode a_objParent, StructureSpecialInformation a_objSpezialinfo, GlycoGraph a_objGraph) throws SugarImporterException { int t_iX = a_iX; int t_iY = a_iY; if ( this.m_aLines[t_iY].charAt(t_iX) != ')' ) { throw new SugarImporterException( "IUPAC2D005", t_iX , t_iY ); } // "-" "(" <zahl> "-" <zahl> if ( --t_iX < 0) { throw new SugarImporterException( "IUPAC2D004", t_iX , t_iY ); } this.m_iCharacterCounter++; this.parseRightLinkage( t_iX , t_iY , a_objParent , a_objSpezialinfo , a_objGraph ); } private void parseEndLinkage(int a_iPosX , int a_iPosY , GlycoNode a_objParent , Linkage a_objParentLinkage , StructureSpecialInformation a_objSpezial , GlycoGraph a_objGraph, String a_strChildResidueNameEnd,boolean a_bLookingForTarget) throws SugarImporterException { // "-" "(" if ( a_iPosX < 0) { throw new SugarImporterException( "IUPAC2D004", a_iPosX , a_iPosY ); } if ( this.m_aLines[a_iPosY].charAt(a_iPosX) != '(' ) { throw new SugarImporterException( "IUPAC2D005", a_iPosX , a_iPosY ); } // "-" if ( --a_iPosX < 0) { throw new SugarImporterException( "IUPAC2D004", a_iPosX , a_iPosY ); } if ( this.m_aLines[a_iPosY].charAt(a_iPosX) != '-' ) { throw new SugarImporterException( "IUPAC2D005", a_iPosX , a_iPosY ); } a_iPosX--; this.m_iCharacterCounter +=2; this.lookAround( a_iPosX , a_iPosY , a_objParent, a_objParentLinkage, a_objSpezial , a_objGraph , a_bLookingForTarget , a_strChildResidueNameEnd ); } /** * := "repeat" | "cyclic" | <number> [ "]" <character> [ "x" ] ] | "O" | "N" | "S" * * @return number of readed characters or -1 for the end of a repeat or cyclic unit */ private void parseLeftLinkage( int a_iPosX , int a_iPosY , GlycoNode a_objParent , Linkage a_objLinkageNormal , Linkage a_objLinkageStartRepeat , StructureSpecialInformation a_objSpezial , GlycoGraph a_objGraph) throws SugarImporterException { // "-" int t_iX = a_iPosX - 1; this.m_iCharacterCounter++; int t_iY = a_iPosY; String t_strNamesRest = ""; // remove ' while ( this.m_aLines[a_iPosY].charAt(t_iX) == '\'' ) { this.m_iCharacterCounter++; t_iX--; if ( t_iX < 0 ) { throw new SugarImporterException( "IUPAC2D024", t_iX+1 , t_iY ); } } Linkage t_objLinkage = a_objLinkageStartRepeat; if ( a_objLinkageStartRepeat == null ) { // normal linkage not linkage start t_objLinkage = a_objLinkageNormal; } try { if ( t_iX < 1 ) { throw new SugarImporterException( "IUPAC2D004", t_iX , t_iY ); } if ( this.m_aLines[t_iY].charAt(t_iX) == 'O' && this.m_aLines[t_iY].charAt(t_iX-1) == '(' ) { this.m_iCharacterCounter ++; t_iX--; // sauerstoffverbindung t_objLinkage.addChildLinkage(1); if ( a_objLinkageStartRepeat == null ) { this.parseEndLinkage(t_iX,t_iY, a_objParent , t_objLinkage , a_objSpezial, a_objGraph , "O" ,false); } else { this.parseEndLinkage(t_iX,t_iY, null , null , a_objSpezial, a_objGraph , "O" ,true); } return; } if ( this.m_aLines[t_iY].charAt(t_iX) == 'N' && this.m_aLines[t_iY].charAt(t_iX-1) == '(' ) { // N linkage this.m_iCharacterCounter++; t_iX--; UnvalidatedGlycoNode t_objNode = new UnvalidatedGlycoNode(); t_objNode.setName("N"); a_objGraph.addNode(t_objNode); t_objLinkage.addChildLinkage(1); if ( a_objLinkageStartRepeat == null ) { GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objLinkage); a_objGraph.addEdge(a_objParent,t_objNode,t_objEdge); } else { a_objSpezial.setTarget(t_objNode); } // prepare vor subtree Linkage t_objLinkageNew = new Linkage(); t_objLinkageNew.addParentLinkage(1); t_objLinkageNew.addChildLinkage(Linkage.UNKNOWN_POSITION); this.parseEndLinkage(t_iX , a_iPosY , t_objNode, t_objLinkageNew , a_objSpezial , a_objGraph, "",false); return; } if ( this.m_aLines[t_iY].charAt(t_iX) == 'S' && this.m_aLines[t_iY].charAt(t_iX-1) == '(' ) { // S linkage this.m_iCharacterCounter++; t_iX--; UnvalidatedGlycoNode t_objNode = new UnvalidatedGlycoNode(); t_objNode.setName("S"); a_objGraph.addNode(t_objNode); t_objLinkage.addChildLinkage(1); if ( a_objLinkageStartRepeat == null ) { GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objLinkage); a_objGraph.addEdge(a_objParent,t_objNode,t_objEdge); } else { a_objSpezial.setTarget(t_objNode); } // prepare vor subtree Linkage t_objLinkageNew = new Linkage(); t_objLinkageNew.addParentLinkage(1); t_objLinkageNew.addChildLinkage(Linkage.UNKNOWN_POSITION); this.parseEndLinkage(t_iX , a_iPosY , t_objNode, t_objLinkageNew , a_objSpezial , a_objGraph, "",false); return; } // test for cyclic & repeat if ( this.m_aLines[t_iY].charAt(t_iX) == 't' || this.m_aLines[t_iY].charAt(t_iX) == 'c' ) { // there must be at least 6 signs if ( t_iX > 4 ) { String t_strPart = this.m_aLines[t_iY].substring( t_iX - 5 , t_iX + 1 ); if ( t_strPart.toLowerCase().equalsIgnoreCase("repeat") ) { if ( a_objLinkageStartRepeat != null ) { throw new SugarImporterException( "IUPAC2D023", t_iX , t_iY ); } // repeat end if ( a_objSpezial == null ) { throw new SugarImporterException( "IUPAC2D015", a_iPosX , a_iPosY ); } if ( a_objSpezial.getType() != StructureSpecialInformation.REPEAT ) { throw new SugarImporterException( "IUPAC2D015", a_iPosX , a_iPosY ); } t_iX -= 6; this.m_iCharacterCounter +=6; // is one charakter before? if so, then it must be a whitespace if ( t_iX > -1 ) { if ( this.m_aLines[t_iY].charAt(t_iX) != ' ' ) { throw new SugarImporterException( "IUPAC2D006", t_iX , t_iY ); } } // fill repeat Linkage t_objInternal = a_objSpezial.getIncomingLinkage(); t_objInternal.setParentLinkages(t_objLinkage.getParentLinkages()); GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objInternal); SugarUnitRepeat t_objRepeat = a_objSpezial.getRepeatBlock(); t_objRepeat.setRepeatLinkage(t_objEdge,a_objParent,a_objSpezial.getTarget()); t_objRepeat.setMinRepeatCount( a_objSpezial.getRepeatCountMin() ); t_objRepeat.setMaxRepeatCount( a_objSpezial.getRepeatCountMax() ); // restore spezial if ( a_objSpezial.isClosed() ) { throw new SugarImporterException( "IUPAC2D015", t_iX , t_iY ); } a_objSpezial.close(); return; } if ( t_strPart.toLowerCase().equalsIgnoreCase("cyclic") ) { if ( a_objLinkageStartRepeat != null ) { throw new SugarImporterException( "IUPAC2D023", t_iX , t_iY ); } // cyclic end if ( a_objSpezial == null ) { throw new SugarImporterException( "IUPAC2D016", a_iPosX , a_iPosY ); } if ( a_objSpezial.getType() != StructureSpecialInformation.CYCLIC ) { throw new SugarImporterException( "IUPAC2D016", a_iPosX , a_iPosY ); } t_iX -= 6; this.m_iCharacterCounter +=6; // is one charakter before? if so, then it must be a whitespace if ( t_iX > -1 ) { if ( this.m_aLines[t_iY].charAt(t_iX) != ' ' ) { throw new SugarImporterException( "IUPAC2D007", t_iX , t_iY ); } } // cyclic aufbauen GlycoEdge t_objEdge = new GlycoEdge(); Linkage t_objCyclic = a_objSpezial.getIncomingLinkage(); t_objCyclic.setParentLinkages(t_objLinkage.getParentLinkages()); t_objEdge.addGlycosidicLinkage(t_objCyclic); a_objGraph.addEdge(a_objParent,a_objSpezial.getTarget(),t_objEdge); if ( a_objSpezial.isClosed() ) { throw new SugarImporterException( "IUPAC2D016", t_iX , t_iY ); } a_objSpezial.close(); return; } } } // TODO if ( this.m_aLines[t_iY].charAt(t_iX) == ']' ) { throw new SugarImporterException( "IUPAC2D020", t_iX , t_iY ); } // <number> [ "]" <character> [ "x" ] ] int t_iDigit = (int)this.m_aLines[t_iY].charAt(t_iX); boolean t_bRepeat = false; String t_strRepeatSymbol = ""; if ( (t_iDigit < 48 || t_iDigit > 57) && this.m_aLines[t_iY].charAt(t_iX) != '?' ) { t_bRepeat = true; // no number ==> repeat opening while ( this.m_aLines[t_iY].charAt(t_iX) != ']' ) { t_strRepeatSymbol = this.m_aLines[t_iY].charAt(t_iX) + t_strRepeatSymbol; t_iX--; this.m_iCharacterCounter++; if ( t_iX < 1 ) { throw new SugarImporterException( "IUPAC2D019", t_iX , t_iY ); } } t_iX--; this.m_iCharacterCounter++; t_iDigit = (int)this.m_aLines[t_iY].charAt(t_iX); } // linkage position if ( this.m_aLines[t_iY].charAt(t_iX) == '?' ) { t_objLinkage.addChildLinkage(Linkage.UNKNOWN_POSITION); t_iX--; this.m_iCharacterCounter++; } else if ( this.m_aLines[t_iY].charAt(t_iX) == 'O' ) { t_objLinkage.addChildLinkage(1); t_iX--; this.m_iCharacterCounter++; t_strNamesRest = "O"; } else { if ( t_iDigit < 48 || t_iDigit > 57 ) { throw new SugarImporterException( "IUPAC2D019", t_iX , t_iY ); } int t_iNumber = 0; // now a normal number or number / number while ( t_iDigit >= 48 && t_iDigit <= 57 ) { t_iNumber = (10 * t_iNumber) + (t_iDigit-48); // is a number this.m_iCharacterCounter++; t_iX--; if ( t_iX >= 0 ) { t_iDigit = (int)this.m_aLines[t_iY].charAt(t_iX); } else { t_iDigit = 0; } } t_objLinkage.addChildLinkage(t_iNumber); // one character parsed while( this.m_aLines[t_iY].charAt(t_iX) == '/' ) { t_iX--; this.m_iCharacterCounter++; if ( t_iX < 0 ) { throw new SugarImporterException( "IUPAC2D009", t_iX+1 , t_iY ); } t_iDigit = (int)this.m_aLines[t_iY].charAt(t_iX); if ( t_iDigit < 48 || t_iDigit > 57 ) { throw new SugarImporterException( "IUPAC2D009", t_iX , t_iY ); } t_iNumber = 0; while ( t_iDigit >= 48 && t_iDigit <= 57 ) { t_iNumber = (10 * t_iNumber) + (t_iDigit-48); // is a number this.m_iCharacterCounter++; t_iX--; if ( t_iX >= 0 ) { t_iDigit = (int)this.m_aLines[t_iY].charAt(t_iX); } else { t_iDigit = 0; } } t_objLinkage.addChildLinkage(t_iNumber); } } if ( t_bRepeat ) { // create new repeat SugarUnitRepeat t_objRepeat = new SugarUnitRepeat(); a_objGraph.addNode(t_objRepeat); if ( a_objLinkageStartRepeat == null ) { GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objLinkage); a_objGraph.addEdge(a_objParent,t_objRepeat,t_objEdge); } else { a_objSpezial.setTarget(t_objRepeat); } if ( t_strRepeatSymbol.length() > 1 ) { if ( t_strRepeatSymbol.charAt(t_strRepeatSymbol.length()-1) == 'x' ) { t_strRepeatSymbol = t_strRepeatSymbol.substring(0,t_strRepeatSymbol.length()-1); } } Integer t_iRepCountMin = this.m_hashRepeatCountMin.get(t_strRepeatSymbol); if ( t_iRepCountMin == null ) { t_iRepCountMin = SugarUnitRepeat.UNKNOWN; this.m_aWarnings.add("min. number of repeat interval for inter repeat '" + t_strRepeatSymbol + "' not set"); } t_objRepeat.setMinRepeatCount( t_iRepCountMin ); Integer t_iRepCountMax = this.m_hashRepeatCountMax.get(t_strRepeatSymbol); if ( t_iRepCountMax == null ) { t_iRepCountMax = SugarUnitRepeat.UNKNOWN ; this.m_aWarnings.add("max. number of repeat interval for inter repeat '" + t_strRepeatSymbol + "' not set"); } t_objRepeat.setMaxRepeatCount( t_iRepCountMax ); // new special unit Linkage t_objLinkageNew = new Linkage(); t_objLinkageNew.setChildLinkages( t_objLinkage.getChildLinkages() ); StructureSpecialInformation t_objSpezialInfo = new StructureSpecialInformation(null,t_objLinkageNew,t_iRepCountMin,t_iRepCountMax,t_objRepeat,a_objSpezial,a_objGraph); this.m_aSpecialList.add(t_objSpezialInfo); this.parseEndLinkage(t_iX,t_iY,null,null,t_objSpezialInfo,t_objRepeat,t_strNamesRest,true); } else { if ( a_objLinkageStartRepeat == null ) { this.parseEndLinkage(t_iX,t_iY,a_objParent,t_objLinkage,a_objSpezial,a_objGraph,t_strNamesRest,false); } else { this.parseEndLinkage(t_iX,t_iY,null,null,a_objSpezial,a_objGraph,t_strNamesRest,true); } } return; } catch (GlycoconjugateException e) { throw new SugarImporterException( "COMMON013", t_iX , t_iY ); } } // right linkage : <number> | "N" | <zahl> "/" <zahl> private void parseRightLinkage( int a_iPosX , int a_iPosY , GlycoNode a_objParent , StructureSpecialInformation a_objSpezial , GlycoGraph a_objGraph) throws SugarImporterException { int t_iX = a_iPosX; int t_iY = a_iPosY; // remove ' while ( this.m_aLines[a_iPosY].charAt(t_iX) == '\'' ) { this.m_iCharacterCounter++; t_iX--; if ( t_iX < 0 ) { throw new SugarImporterException( "IUPAC2D024", t_iX+1 , t_iY ); } } int t_iDigit = (int)this.m_aLines[a_iPosY].charAt(t_iX); try { if ( t_iDigit >= 48 && t_iDigit <= 57 ) { Linkage t_objLinkage = new Linkage(); int t_iNumber = 0; while ( t_iDigit >= 48 && t_iDigit <= 57 ) { t_iNumber = (10 * t_iNumber) + (t_iDigit-48); // is a number this.m_iCharacterCounter++; t_iX--; if ( t_iX < 0 ) { t_objLinkage.addParentLinkage(t_iNumber); this.parseLeftLinkage(t_iX , a_iPosY , a_objParent, t_objLinkage , null , a_objSpezial , a_objGraph); return; } t_iDigit = (int)this.m_aLines[t_iY].charAt(t_iX); } t_objLinkage.addParentLinkage(t_iNumber); // one character parsed while( this.m_aLines[t_iY].charAt(t_iX) == '/' ) { t_iX--; this.m_iCharacterCounter++; if ( t_iX < 0 ) { throw new SugarImporterException( "IUPAC2D009", t_iX+1 , t_iY ); } t_iDigit = (int)this.m_aLines[t_iY].charAt(t_iX); if ( t_iDigit < 48 || t_iDigit > 57 ) { throw new SugarImporterException( "IUPAC2D009", t_iX , t_iY ); } t_iNumber = 0; while ( t_iDigit >= 48 && t_iDigit <= 57 ) { t_iNumber = (10 * t_iNumber) + (t_iDigit-48); // is a number this.m_iCharacterCounter++; t_iX--; if ( t_iX < 0 ) { t_objLinkage.addParentLinkage(t_iNumber); this.parseLeftLinkage(t_iX , a_iPosY , a_objParent, t_objLinkage , null , a_objSpezial , a_objGraph); return; } t_iDigit = (int)this.m_aLines[t_iY].charAt(t_iX); } t_objLinkage.addParentLinkage(t_iNumber); } if ( this.m_aLines[t_iY].charAt(t_iX) == '[' ) { t_iX--; this.m_iCharacterCounter++; if ( a_objSpezial == null ) { throw new SugarImporterException( "IUPAC2D014", a_iPosX , a_iPosY ); } if ( a_objSpezial.getType() != StructureSpecialInformation.REPEAT ) { throw new SugarImporterException( "IUPAC2D014", a_iPosX , a_iPosY ); } // fill repeat Linkage t_objInternal = a_objSpezial.getIncomingLinkage(); t_objInternal.setParentLinkages(t_objLinkage.getParentLinkages()); GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objInternal); SugarUnitRepeat t_objRepeat = a_objSpezial.getRepeatBlock(); t_objRepeat.setRepeatLinkage(t_objEdge,a_objParent,a_objSpezial.getTarget()); t_objRepeat.setMinRepeatCount( a_objSpezial.getRepeatCountMin() ); t_objRepeat.setMaxRepeatCount( a_objSpezial.getRepeatCountMax() ); // restore spezial if ( a_objSpezial.isClosed() ) { throw new SugarImporterException( "IUPAC2D021", a_iPosX , a_iPosY ); } a_objSpezial.close(); this.parseLeftLinkage(t_iX , a_iPosY , t_objRepeat, t_objLinkage , null , a_objSpezial.getParentInfo() , a_objSpezial.getParentUnit() ); return; } this.parseLeftLinkage(t_iX , a_iPosY , a_objParent, t_objLinkage , null , a_objSpezial , a_objGraph); return; } else if ( this.m_aLines[t_iY].charAt(t_iX) == '?' ) { t_iX--; this.m_iCharacterCounter++; Linkage t_objLinkage = new Linkage(); t_objLinkage.addParentLinkage(Linkage.UNKNOWN_POSITION); if ( t_iX < 0 ) { this.parseLeftLinkage(t_iX , a_iPosY , a_objParent, t_objLinkage , null , a_objSpezial , a_objGraph); } if ( this.m_aLines[t_iY].charAt(t_iX) == '[' ) { t_iX--; this.m_iCharacterCounter++; if ( a_objSpezial == null ) { throw new SugarImporterException( "IUPAC2D014", a_iPosX , a_iPosY ); } if ( a_objSpezial.getType() != StructureSpecialInformation.REPEAT ) { throw new SugarImporterException( "IUPAC2D014", a_iPosX , a_iPosY ); } // fill repeat Linkage t_objInternal = a_objSpezial.getIncomingLinkage(); t_objInternal.setParentLinkages(t_objLinkage.getParentLinkages()); GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objInternal); SugarUnitRepeat t_objRepeat = a_objSpezial.getRepeatBlock(); t_objRepeat.setRepeatLinkage(t_objEdge,a_objParent,a_objSpezial.getTarget()); t_objRepeat.setMinRepeatCount( a_objSpezial.getRepeatCountMin() ); t_objRepeat.setMaxRepeatCount( a_objSpezial.getRepeatCountMax() ); // restore spezial if ( a_objSpezial.isClosed() ) { throw new SugarImporterException( "IUPAC2D021", a_iPosX , a_iPosY ); } a_objSpezial.close(); this.parseLeftLinkage(t_iX , a_iPosY , t_objRepeat, t_objLinkage , null , a_objSpezial.getParentInfo() , a_objSpezial.getParentUnit() ); return; } this.parseLeftLinkage(t_iX , a_iPosY , a_objParent, t_objLinkage , null , a_objSpezial , a_objGraph); return; } else if ( this.m_aLines[t_iY].charAt(t_iX) == 'O' ) { t_iX--; this.m_iCharacterCounter++; if ( a_objParent.getClass() != UnvalidatedGlycoNode.class ) { throw new SugarImporterException( "IUPAC2D013", a_iPosX , a_iPosY ); } UnvalidatedGlycoNode t_objNode = (UnvalidatedGlycoNode)a_objParent; t_objNode.setName( "O" + t_objNode.getName() ); Linkage t_objLinkage = new Linkage(); t_objLinkage.addParentLinkage(1); if ( t_iX < 0 ) { this.parseLeftLinkage(t_iX , a_iPosY , a_objParent, t_objLinkage , null , a_objSpezial , a_objGraph); return; } if ( this.m_aLines[t_iY].charAt(t_iX) == '[' ) { t_iX--; this.m_iCharacterCounter++; if ( a_objSpezial == null ) { throw new SugarImporterException( "IUPAC2D014", a_iPosX , a_iPosY ); } if ( a_objSpezial.getType() != StructureSpecialInformation.REPEAT ) { throw new SugarImporterException( "IUPAC2D014", a_iPosX , a_iPosY ); } // fill repeat Linkage t_objInternal = a_objSpezial.getIncomingLinkage(); t_objInternal.setParentLinkages(t_objLinkage.getParentLinkages()); GlycoEdge t_objEdge = new GlycoEdge(); t_objEdge.addGlycosidicLinkage(t_objInternal); SugarUnitRepeat t_objRepeat = a_objSpezial.getRepeatBlock(); t_objRepeat.setRepeatLinkage(t_objEdge,a_objParent,a_objSpezial.getTarget()); t_objRepeat.setMinRepeatCount( a_objSpezial.getRepeatCountMin() ); t_objRepeat.setMaxRepeatCount( a_objSpezial.getRepeatCountMax() ); // restore spezial if ( a_objSpezial.isClosed() ) { throw new SugarImporterException( "IUPAC2D021", a_iPosX , a_iPosY ); } a_objSpezial.close(); this.parseLeftLinkage(t_iX , a_iPosY , t_objRepeat, t_objLinkage , null , a_objSpezial.getParentInfo() , a_objSpezial.getParentUnit() ); return; } this.parseLeftLinkage(t_iX , a_iPosY , a_objParent, t_objLinkage , null , a_objSpezial , a_objGraph); return; } else if ( this.m_aLines[t_iY].charAt(t_iX) == 'N' ) { t_iX--; this.m_iCharacterCounter++; UnvalidatedGlycoNode t_objNode = new UnvalidatedGlycoNode(); t_objNode.setName("N"); a_objGraph.addNode(t_objNode); GlycoEdge t_objEdge = new GlycoEdge(); Linkage t_objLinkage = new Linkage(); t_objLinkage.addParentLinkage(Linkage.UNKNOWN_POSITION); t_objLinkage.addChildLinkage(1); t_objEdge.addGlycosidicLinkage(t_objLinkage); a_objGraph.addEdge(a_objParent,t_objNode,t_objEdge); t_objLinkage = new Linkage(); t_objLinkage.addParentLinkage(1); this.parseLeftLinkage(t_iX , a_iPosY , t_objNode, t_objLinkage , null , a_objSpezial , a_objGraph); return; } else if ( this.m_aLines[t_iY].charAt(t_iX) == 'S' ) { t_iX--; this.m_iCharacterCounter++; UnvalidatedGlycoNode t_objNode = new UnvalidatedGlycoNode(); t_objNode.setName("S"); a_objGraph.addNode(t_objNode); GlycoEdge t_objEdge = new GlycoEdge(); Linkage t_objLinkage = new Linkage(); t_objLinkage.addParentLinkage(Linkage.UNKNOWN_POSITION); t_objLinkage.addChildLinkage(1); t_objEdge.addGlycosidicLinkage(t_objLinkage); a_objGraph.addEdge(a_objParent,t_objNode,t_objEdge); t_objLinkage = new Linkage(); t_objLinkage.addParentLinkage(1); this.parseLeftLinkage(t_iX , a_iPosY , t_objNode, t_objLinkage , null , a_objSpezial , a_objGraph); return; } throw new SugarImporterException( "IUPAC2D008", a_iPosX , a_iPosY ); } catch (GlycoconjugateException e) { throw new SugarImporterException( "COMMON013", a_iPosX,a_iPosY); } } public void setMinRepeatCount(String a_strSymbol, int a_iCount) { if ( a_strSymbol == null ) { this.m_iMinRepeatCount = a_iCount; } else { this.m_hashRepeatCountMin.put(a_strSymbol,a_iCount); } } public void setMaxRepeatCount(String a_strSymbol, int a_iCount) { if ( a_strSymbol == null ) { this.m_iMaxRepeatCount = a_iCount; } else { this.m_hashRepeatCountMax.put(a_strSymbol,a_iCount); } } /** * @return */ private boolean isCyclic() { for (int i = 0 ; i < this.m_iLineCount ; i++) { String t_strTemp = this.m_aLines[i].toLowerCase(); if ( t_strTemp.indexOf("cyclic") != -1 ) { return true; } } return false; } public ArrayList<String> getWarnings() { return this.m_aWarnings; } public void clearRepeatCounts() { this.m_hashRepeatCountMax.clear(); this.m_hashRepeatCountMin.clear(); this.m_iMaxRepeatCount = -2; this.m_iMinRepeatCount = -2; } }