/*
* EuroCarbDB, a framework for carbohydrate bioinformatics
*
* Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as
* indicated by the @author tags or express copyright attribution
* statements applied by the authors.
*
* This copyrighted material is made available to anyone wishing to use, modify,
* copy, or redistribute it subject to the terms and conditions of the GNU
* Lesser General Public License, as published by the Free Software Foundation.
* A copy of this license accompanies this distribution in the file LICENSE.txt.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
* for more details.
*
* Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$
*/
package org.eurocarbdb.MolecularFramework.util.validation;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import org.eurocarbdb.MolecularFramework.sugar.Anomer;
import org.eurocarbdb.MolecularFramework.sugar.BaseType;
import org.eurocarbdb.MolecularFramework.sugar.GlycoEdge;
import org.eurocarbdb.MolecularFramework.sugar.GlycoGraph;
import org.eurocarbdb.MolecularFramework.sugar.GlycoGraphAlternative;
import org.eurocarbdb.MolecularFramework.sugar.GlycoNode;
import org.eurocarbdb.MolecularFramework.sugar.GlycoconjugateException;
import org.eurocarbdb.MolecularFramework.sugar.Linkage;
import org.eurocarbdb.MolecularFramework.sugar.LinkageType;
import org.eurocarbdb.MolecularFramework.sugar.Modification;
import org.eurocarbdb.MolecularFramework.sugar.ModificationType;
import org.eurocarbdb.MolecularFramework.sugar.Monosaccharide;
import org.eurocarbdb.MolecularFramework.sugar.NonMonosaccharide;
import org.eurocarbdb.MolecularFramework.sugar.Substituent;
import org.eurocarbdb.MolecularFramework.sugar.SubstituentType;
import org.eurocarbdb.MolecularFramework.sugar.Sugar;
import org.eurocarbdb.MolecularFramework.sugar.SugarUnitAlternative;
import org.eurocarbdb.MolecularFramework.sugar.SugarUnitCyclic;
import org.eurocarbdb.MolecularFramework.sugar.SugarUnitRepeat;
import org.eurocarbdb.MolecularFramework.sugar.Superclass;
import org.eurocarbdb.MolecularFramework.sugar.UnderdeterminedSubTree;
import org.eurocarbdb.MolecularFramework.sugar.UnvalidatedGlycoNode;
import org.eurocarbdb.MolecularFramework.util.traverser.GlycoTraverser;
import org.eurocarbdb.MolecularFramework.util.traverser.GlycoTraverserValdidation;
import org.eurocarbdb.MolecularFramework.util.visitor.GlycoVisitor;
import org.eurocarbdb.MolecularFramework.util.visitor.GlycoVisitorException;
import org.eurocarbdb.MolecularFramework.util.visitor.GlycoVisitorNodeType;
/**
* Validation for connected Sugars. Validates each residue and his direct connections.
*
* TODO:
* - test linkage positions for substitutens (MSDB)
* - test/warn if a substituent typicly do not have a child linkage
* - uncertain terminal residues, structure comparision (2x same uncertain structure + 2x same two attach positions = destinct structure)
* - getSimpleGlycoNode returns null for Alternative nodes (which means in that case no further testing)
* - test if minimum constrain for alternative unit is given
* - test for hierarchy of non sharp information
*/
public class GlycoVisitorValidation implements GlycoVisitor
{
private ArrayList<String> m_aErrorList = new ArrayList<String>();
private ArrayList<String> m_aWarningList = new ArrayList<String>();
private GlycoGraph m_objGlycoGraph = null;
private ArrayList<GlycoEdge> m_aEdge = new ArrayList<GlycoEdge>();
private GlycoVisitorNodeType m_visNodeType = new GlycoVisitorNodeType();
public ArrayList<String> getErrors()
{
return this.m_aErrorList;
}
public ArrayList<String> getWarnings()
{
return this.m_aWarningList;
}
public GlycoTraverser getTraverser(GlycoVisitor a_objVisitor) throws GlycoVisitorException
{
return new GlycoTraverserValdidation(a_objVisitor);
}
/**
* - must contain at least one linkage
* - each linkage must have at least one parent and one child linkage position
* - linkage position may not be duplicated
* - UNKNOWN_POSITION and another position value are not allowed in the same linkage
* - parent and child may not be null and must be part of he GlycoGraph (m_objGlycoGraph)
*/
public void visit(GlycoEdge a_objEdge) throws GlycoVisitorException
{
if ( !this.m_aEdge.contains(a_objEdge) )
{
// Edge was not checked before.
if ( a_objEdge.getChild() == null || a_objEdge.getParent() == null )
{
this.m_aErrorList.add("Child or parent are null in Edge.");
}
if ( !this.m_objGlycoGraph.containsNode(a_objEdge.getChild()) )
{
this.m_aErrorList.add("Child node of an linkage is not part of the same sugar block.");
}
if ( !this.m_objGlycoGraph.containsNode(a_objEdge.getParent()) )
{
this.m_aErrorList.add("Parent node of an linkage is not part of the same sugar block.");
}
this.testLinkageArray(a_objEdge);
GlycoNode t_objNode = a_objEdge.getChild();
if ( t_objNode.getParentEdge() != a_objEdge )
{
this.m_aErrorList.add("Child residue in edge does not have this edge as parent.");
}
t_objNode = a_objEdge.getParent();
if ( !t_objNode.getChildEdges().contains(a_objEdge) )
{
this.m_aErrorList.add("Parent residue in edge does not have this edge as child.");
}
this.m_aEdge.add(a_objEdge);
}
}
/**
*
*/
private void testLinkageArray(GlycoEdge a_objEdge)
{
if ( a_objEdge.getGlycosidicLinkages().size() == 0 )
{
this.m_aErrorList.add("GlycoEdge contains no Linkages.");
}
for (Iterator<Linkage> t_iterLinkages = a_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkages.hasNext();)
{
Linkage t_objLinkage = t_iterLinkages.next();
// child side
if ( t_objLinkage.getChildLinkages().size() == 0 )
{
this.m_aErrorList.add("No child linkage position given in GlycoEdge.");
}
ArrayList<Integer> t_aPositions = new ArrayList<Integer>();
Integer t_iPosition = null;
for (Iterator<Integer> t_iterPosition = t_objLinkage.getChildLinkages().iterator(); t_iterPosition.hasNext();)
{
t_iPosition = t_iterPosition.next();
if ( t_aPositions.contains(t_iPosition) )
{
this.m_aErrorList.add("Duplicated child linkage position in GlycoEdge.");
}
t_aPositions.add(t_iPosition);
}
t_aPositions = t_objLinkage.getChildLinkages();
if ( t_aPositions.size() > 1 && t_aPositions.contains(Linkage.UNKNOWN_POSITION) )
{
this.m_aErrorList.add("Unknown and defined positions in one GlycoEdge.");
}
// parent side
if ( t_objLinkage.getParentLinkages().size() == 0 )
{
this.m_aErrorList.add("No parent linkage position given in GlycoEdge.");
}
t_aPositions = new ArrayList<Integer>();
t_iPosition = null;
for (Iterator<Integer> t_iterPosition = t_objLinkage.getParentLinkages().iterator(); t_iterPosition.hasNext();)
{
t_iPosition = t_iterPosition.next();
if ( t_aPositions.contains(t_iPosition) )
{
this.m_aErrorList.add("Duplicated parent linkage position in GlycoEdge.");
}
t_aPositions.add(t_iPosition);
}
t_aPositions = t_objLinkage.getParentLinkages();
if ( t_aPositions.size() > 1 && t_aPositions.contains(Linkage.UNKNOWN_POSITION) )
{
this.m_aErrorList.add("Unknown and defined positions in one GlycoEdge.");
}
if ( t_objLinkage.getChildLinkageType() == null || t_objLinkage.getParentLinkageType() == null )
{
this.m_aErrorList.add("Linkagetype is not set.");
}
}
}
/**
* - test if all Linkagetypes around the NonMS are set to LinkageType.NONMONOSACCHARID
* - warning is not allowed for unique GlycoCT
*/
public void visit(NonMonosaccharide a_objResidue) throws GlycoVisitorException
{
this.m_aWarningList.add("NonMonosaccharides are not allowed in unique GlycoCT.");
GlycoEdge t_objEdge = a_objResidue.getParentEdge();
if ( t_objEdge != null )
{
for (Iterator<Linkage> t_iterLinkages = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkages.hasNext();)
{
Linkage t_objLinkage = t_iterLinkages.next();
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("LinkageType for NonMonosaccharide must be LinkageType.NONMONOSACCHARID.");
}
}
}
for (Iterator<GlycoEdge> t_iterEdges = a_objResidue.getChildEdges().iterator(); t_iterEdges.hasNext();)
{
t_objEdge = t_iterEdges.next();
for (Iterator<Linkage> t_iterLinkages = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkages.hasNext();)
{
Linkage t_objLinkage = t_iterLinkages.next();
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("LinkageType for NonMonosaccharide must be LinkageType.NONMONOSACCHARID.");
}
}
}
}
/**
* Simple : Not allowed!
*/
public void visit(UnvalidatedGlycoNode a_objUnvalidated) throws GlycoVisitorException
{
this.m_aErrorList.add("UnvalidatedGlycoNode in Sugar.");
}
/**
* - test if all Linkagetypes around the substituents are set to LinkageType.NONMONOSACCHARID
* - test if count of linkages match with minimal count in SubstituentType
* - epoxy, anhydro, lacton only 2 linkage to the same monosaccharide and no child linkages
* - subst - subst linkage forbidden (beside repeat and alternative)
*/
public void visit(Substituent a_objSubstituent) throws GlycoVisitorException
{
GlycoEdge t_objEdge = a_objSubstituent.getParentEdge();
int t_iLinkageCount = 0;
if ( t_objEdge != null )
{
for (Iterator<Linkage > t_iterLinkage = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkage.hasNext();)
{
t_iLinkageCount++;
Linkage t_objLinkage = t_iterLinkage.next();
if (!a_objSubstituent.getSubstituentType().getComplexType())
{
for (Iterator <Integer> iter = t_objLinkage.getChildLinkages().iterator(); iter.hasNext();)
{
Integer element = iter.next();
if (!element.equals(1)){
this.m_aErrorList.add("For this substituent "+a_objSubstituent.getSubstituentType().getName()+" linkage pos must be 1.");
}
}
}
else
{
for (Iterator <Integer> iter = t_objLinkage.getChildLinkages().iterator(); iter.hasNext();)
{
int element = iter.next();
if ( element < 1 && element != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("For this substituent "+a_objSubstituent.getSubstituentType().getName()+" linkage pos must be larger than 0.");
}
}
}
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("LinkageType for substituent must be LinkageType.NONMONOSACCHARID.");
}
}
}
for (Iterator<GlycoEdge> t_iterEdges = a_objSubstituent.getChildEdges().iterator(); t_iterEdges.hasNext();)
{
t_objEdge = t_iterEdges.next();
for (Iterator<Linkage > t_iterLinkage = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkage.hasNext();)
{
t_iLinkageCount++;
Linkage t_objLinkage = t_iterLinkage.next();
if (!a_objSubstituent.getSubstituentType().getComplexType()){
for (Iterator <Integer> iter = t_objLinkage.getParentLinkages().iterator(); iter.hasNext();) {
Integer element = iter.next();
if (!element.equals(1)){
this.m_aErrorList.add("For this substituent linkage pos must be 1.");
}
}
}
else
{
for (Iterator <Integer> iter = t_objLinkage.getParentLinkages().iterator(); iter.hasNext();)
{
int element = iter.next();
if ( element < 1 && element != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("For this substituent "+a_objSubstituent.getSubstituentType().getName()+" linkage pos must be larger than 0.");
}
}
}
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("LinkageType for substituent must be LinkageType.NONMONOSACCHARID.");
}
}
}
if ( a_objSubstituent.getSubstituentType() == null )
{
this.m_aErrorList.add("SubstituentType for substituent is null.");
}
else {
SubstituentType t_objSubstitutent = a_objSubstituent.getSubstituentType();
if ( t_iLinkageCount < t_objSubstitutent.getMinValence() )
{
this.m_aErrorList.add("Minimum valence constraint for substituent " + a_objSubstituent.getSubstituentType().getName() + " not fulfilled.");
}
if ( t_iLinkageCount > t_objSubstitutent.getMaxValence() && t_objSubstitutent.getMaxValence() > 0 )
{
this.m_aErrorList.add("Maximum valence constraint for substituent " + a_objSubstituent.getSubstituentType().getName() + " broken.");
}
if ( t_objSubstitutent == SubstituentType.EPOXY || t_objSubstitutent == SubstituentType.LACTONE
|| t_objSubstitutent == SubstituentType.ANHYDRO )
{
if ( a_objSubstituent.getChildEdges().size() != 0 )
{
this.m_aErrorList.add("SubstituentType.EPOXY, SubstituentType.LACTONE, SubstituentType.ANHYDRO can not have child linkages.");
}
if ( a_objSubstituent.getParentEdge().getGlycosidicLinkages().size() != 2 )
{
this.m_aErrorList.add("SubstituentType.EPOXY, SubstituentType.LACTONE, SubstituentType.ANHYDRO can not have more than two linkages.");
}
}
}
// test if the parent is also an substituent
if ( a_objSubstituent.getParentEdge() != null )
{
GlycoNode t_objNode = a_objSubstituent.getParentEdge().getParent();
if ( t_objNode != null )
{
if ( this.m_visNodeType.isSubstituent(t_objNode) )
{
this.m_aErrorList.add("Substituent - Substituent linkages are not allowed.");
}
}
}
}
/**
* - cyclic must have a start residue
* - start residue may not be a SugarUnitCyclic
* - start residue must be part of the same GlycoNode
* - cyclic may not have child residues
*/
public void visit(SugarUnitCyclic a_objCyclic) throws GlycoVisitorException
{
GlycoNode t_objNode = a_objCyclic.getCyclicStart();
if ( t_objNode == null )
{
this.m_aErrorList.add("Start residue for cyclic structure is null.");
}
if ( this.m_visNodeType.isSugarUnitCyclic(t_objNode) )
{
this.m_aErrorList.add("Start residue for cyclic structure is a SugarUnitCyclic.");
}
if ( !this.m_objGlycoGraph.containsNode(t_objNode) )
{
this.m_aErrorList.add("Start residue is not part of the same GlycoNode as SugarUnitCyclic.");
}
if ( a_objCyclic.getChildEdges().size() != 0 )
{
this.m_aErrorList.add("SugarUnitCyclic can not have child linkages.");
}
if ( a_objCyclic.getParentEdge() == null )
{
this.m_aErrorList.add("SugarUnitCyclic must have a parent linkage.");
}
else
{
for (Iterator<Linkage> t_iterEdge = a_objCyclic.getParentEdge().getGlycosidicLinkages().iterator(); t_iterEdge.hasNext();)
{
Linkage t_objLinkage = t_iterEdge.next();
for (Iterator<Integer> t_iterPos = t_objLinkage.getChildLinkages().iterator(); t_iterPos.hasNext();)
{
Integer t_iPos = t_iterPos.next();
if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage position smaller than 1 are not allowed.");
}
}
}
}
}
/**
* - sugar only one root residue (connected sugar)
* - UnderdeterminedSubTree only one root residue
* - UnderdeterminedSubTree LowerProb == 100 (no statistic for sugar only repeat)
* - UnderdeterminedSubTree UpperProb > 100 forbidden
* - UnderdeterminedSubTree at least 2 parents for each uncertain terminal residue
* - Root nodes of the sugar can not be substituents
*/
public void start(Sugar a_objSugar) throws GlycoVisitorException
{
this.clear();
try
{
this.m_objGlycoGraph = a_objSugar;
if ( a_objSugar.getRootNodes().size() != 1 )
{
this.m_aErrorList.add("Sugar has more than one root residue.");
}
GlycoTraverser t_objTraverser = this.getTraverser(this);
t_objTraverser.traverseGraph(a_objSugar);
GlycoVisitorNodeType t_visType = new GlycoVisitorNodeType();
for (Iterator<GlycoNode> t_iterRoot = a_objSugar.getRootNodes().iterator(); t_iterRoot.hasNext();)
{
GlycoNode t_objNode = t_iterRoot.next();
if ( t_visType.isSubstituent(t_objNode) )
{
this.m_aErrorList.add("A substituent can not be the root node auf an sugar.");
}
}
for (Iterator<UnderdeterminedSubTree> t_iterSubTree = a_objSugar.getUndeterminedSubTrees().iterator(); t_iterSubTree.hasNext();)
{
UnderdeterminedSubTree t_objTree = t_iterSubTree.next();
this.m_objGlycoGraph = t_objTree;
if ( t_objTree.getRootNodes().size() != 1 )
{
this.m_aErrorList.add("UnderdeterminedSubTree has more than one root residue.");
}
t_objTraverser.traverseGraph(t_objTree);
this.testUnderdeterminded(t_objTree,a_objSugar);
if ( t_objTree.getProbabilityLower() < 100.0 )
{
this.m_aErrorList.add("Sugar can not have a statistical distribution.");
}
if ( t_objTree.getParents().size() < 2 )
{
this.m_aErrorList.add("Each uncertain terminal block needs at least 2 parent residues.");
}
}
}
catch (GlycoconjugateException e)
{
throw new GlycoVisitorException(e.getMessage(),e);
}
}
/**
* - UnderdeterminedSubTree parents must be part of sugar
* - UnderdeterminedSubTree LowerProb <= UpperProb
* - UnderdeterminedSubTree UpperProb > 100 forbidden
* - UnderdeterminedSubTree Connection - parent and child == null
* - UnderdeterminedSubTree Connection - normal linkage position tests (duplication, UNKNOWN + KOWN etc.)
* - UnderdeterminedSubTree Connection test if LinkageType is valid for parent and Lead in residue + in case of two monosaccharides if the connection is a valid glycosidic linkage
* @throws GlycoVisitorException
* @throws GlycoconjugateException
*/
private void testUnderdeterminded(UnderdeterminedSubTree a_objTree,GlycoGraph a_objGraph ) throws GlycoVisitorException, GlycoconjugateException
{
for (Iterator<GlycoNode> t_iterParents = a_objTree.getParents().iterator(); t_iterParents.hasNext();)
{
GlycoNode t_objNode = t_iterParents.next();
if ( !a_objGraph.containsNode(t_objNode) )
{
this.m_aErrorList.add("Parent node of UnderdeterminedSubTree is not part of the attached GlycoGraph.");
}
}
if ( a_objTree.getProbabilityLower() > a_objTree.getProbabilityUpper() )
{
this.m_aErrorList.add("Lower border of probabilitic value for UnderdeterminedSubTree is larger than uper border.");
}
if ( a_objTree.getProbabilityUpper() > 100.0 )
{
this.m_aErrorList.add("A probabilitic value for UnderdeterminedSubTree larger than 100.0% is not possible.");
}
if ( a_objTree.getParents().size() < 2 )
{
if( a_objTree.getProbabilityLower() >= 100.0 )
{
this.m_aErrorList.add("Each uncertain terminal block needs at least 2 parent residues.");
}
}
// connection test
GlycoEdge t_objEdge = a_objTree.getConnection();
if ( t_objEdge.getChild() != null || t_objEdge.getParent() != null )
{
this.m_aErrorList.add("Parent and Child for connection into UnderdetermindedSubTree must be null.");
}
if ( t_objEdge.getGlycosidicLinkages().size() == 0 )
{
this.m_aErrorList.add("Connection into UnderdetermindedSubTree contains no Linkages.");
}
// test linkage positions
for (Iterator<Linkage> t_iterLinkages = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkages.hasNext();)
{
Linkage t_objLinkage = t_iterLinkages.next();
// child side
if ( t_objLinkage.getChildLinkages().size() == 0 )
{
this.m_aErrorList.add("No child linkage position given in GlycoEdge.");
}
ArrayList<Integer> t_aPositions = new ArrayList<Integer>();
Integer t_iPosition = null;
for (Iterator<Integer> t_iterPosition = t_objLinkage.getChildLinkages().iterator(); t_iterPosition.hasNext();)
{
t_iPosition = t_iterPosition.next();
if ( t_aPositions.contains(t_iPosition) )
{
this.m_aErrorList.add("Duplicated child linkage position in GlycoEdge.");
}
t_aPositions.add(t_iPosition);
}
t_aPositions = t_objLinkage.getChildLinkages();
if ( t_aPositions.size() > 1 && t_aPositions.contains(Linkage.UNKNOWN_POSITION) )
{
this.m_aErrorList.add("Unknown and defined positions in one GlycoEdge.");
}
// parent side
if ( t_objLinkage.getParentLinkages().size() == 0 )
{
this.m_aErrorList.add("No parent linkage position given in GlycoEdge.");
}
t_aPositions = new ArrayList<Integer>();
t_iPosition = null;
for (Iterator<Integer> t_iterPosition = t_objLinkage.getParentLinkages().iterator(); t_iterPosition.hasNext();)
{
t_iPosition = t_iterPosition.next();
if ( t_aPositions.contains(t_iPosition) )
{
this.m_aErrorList.add("Duplicated parent linkage position in GlycoEdge.");
}
t_aPositions.add(t_iPosition);
}
t_aPositions = t_objLinkage.getParentLinkages();
if ( t_aPositions.size() > 1 && t_aPositions.contains(Linkage.UNKNOWN_POSITION) )
{
this.m_aErrorList.add("Unknown and defined positions in one GlycoEdge.");
}
// test linkage types - child
LinkageType t_objLinkTypeChild = null;
if ( a_objTree.getRootNodes().size() == 1 )
{
// otherwise this test do not make sense
GlycoNode t_objNode = a_objTree.getRootNodes().get(0);
if ( this.m_visNodeType.isMonosaccharide(t_objNode) )
{
if ( t_objLinkage.getChildLinkageType() == LinkageType.NONMONOSACCHARID
|| t_objLinkage.getChildLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("LinkageType for Start residue of UnderdetermindedSubTree is not allowed (for Monosaccharide).");
}
else
{
t_objLinkTypeChild = t_objLinkage.getChildLinkageType();
}
}
else if ( this.m_visNodeType.isSubstituent(t_objNode) )
{
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("LinkageType of start residue of UnderdetermindedSubTree is not allowed (for Substituent).");
}
}
else if ( this.m_visNodeType.isNonMonosaccharide(t_objNode) )
{
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("LinkageType of start residue of UnderdetermindedSubTree is not allowed (for NonMonosaccharide).");
}
}
}
// test linkage types - parent
boolean t_bContainMS = false;
boolean t_bContainSubst = false;
boolean t_bContainNonMS = false;
for (Iterator<GlycoNode> t_iterParent = a_objTree.getParents().iterator(); t_iterParent.hasNext();)
{
GlycoNode t_objParent = this.getSimpleGlycoNode(t_iterParent.next(),false);
LinkageType t_objLinkTypeParent = null;
if ( this.m_visNodeType.isMonosaccharide(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() == LinkageType.NONMONOSACCHARID
|| t_objLinkage.getParentLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("LinkageType for parent of UnderdetermindedSubTree is not allowed (for Monosaccharide).");
}
else
{
// test for glycosidic linkage
t_objLinkTypeParent = t_objLinkage.getParentLinkageType();
if ( t_objLinkTypeChild != null )
{
// both are monosaccharides
if ( t_objLinkTypeParent != LinkageType.UNKNOWN )
{
if ( t_objLinkTypeChild != LinkageType.DEOXY || t_objLinkTypeParent != LinkageType.H_AT_OH )
{
this.m_aErrorList.add("UnderdetermindedSubTree connection is not a glycosidic linkage (for two Monosaccharides).");
}
}
}
}
t_bContainMS = true;
}
else if ( this.m_visNodeType.isNonMonosaccharide(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID && t_objLinkage.getParentLinkageType() != LinkageType.UNKNOWN )
{
this.m_aErrorList.add("LinkageType of parent residue of UnderdetermindedSubTree is not allowed (for NonMonosaccharide).");
}
t_bContainNonMS = true;
}
else if ( this.m_visNodeType.isSubstituent(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID && t_objLinkage.getParentLinkageType() != LinkageType.UNKNOWN )
{
this.m_aErrorList.add("LinkageType of parent residue of UnderdetermindedSubTree is not allowed (for Substituent).");
}
t_bContainSubst = true;
}
}
if ( t_bContainMS )
{
if ( t_bContainNonMS || t_bContainSubst )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.UNKNOWN )
{
this.m_aErrorList.add("LinkageType of parent residue of UnderdetermindedSubTree must be UNKNOWN (for different types of parent residues).");
}
}
else
{
if ( t_objLinkage.getParentLinkageType() == LinkageType.UNKNOWN )
{
this.m_aErrorList.add("LinkageType of parent residue of UnderdetermindedSubTree can not be UNKNOWN (for same types of parent residues).");
}
}
}
else if ( t_bContainNonMS )
{
if ( t_bContainSubst )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.UNKNOWN )
{
this.m_aErrorList.add("LinkageType of parent residue of UnderdetermindedSubTree must be UNKNOWN (for different types of parent residues).");
}
}
else
{
if ( t_objLinkage.getParentLinkageType() == LinkageType.UNKNOWN )
{
this.m_aErrorList.add("LinkageType of parent residue of UnderdetermindedSubTree can not be UNKNOWN (for same types of parent residues).");
}
}
}
else if ( t_bContainSubst )
{
if ( t_objLinkage.getParentLinkageType() == LinkageType.UNKNOWN )
{
this.m_aErrorList.add("LinkageType of parent residue of UnderdetermindedSubTree can not be UNKNOWN (for same types of parent residues).");
}
}
}
// TODO : this.testMS(); for underdetermined start & parents because of the special linkages
}
/**
* - normal test of GlycoGraph
* - if min == max repeat count they must be larger than 6 or UNKNOWN
* - min and max repeat count may be larger than 0 (beside UNKNOWN)
* - min repeat count may be smaller than max repeat count (beside UNKNOWN)
* - test internal repeat linkage (nodes are part of the repeat, linkagetype correct)
* - test linkage type of linkage into repeat
* - test linkage type of linkage from repeat
* - UnderdeterminedSubTree only one root residue
* - UnderdeterminedSubTree parents must be part of repeat unit
* - UnderdeterminedSubTree LowerProb <= UpperProb
* - UnderdeterminedSubTree UpperProb > 100 forbidden
* - UnderdeterminedSubTree at least 2 parents for each uncertain terminal residue (for statistical one parent is possible)
*/
public void visit(SugarUnitRepeat a_objRepeat) throws GlycoVisitorException
{
try
{
GlycoGraph t_objGraph = this.m_objGlycoGraph;
this.m_objGlycoGraph = a_objRepeat;
if ( a_objRepeat.getRootNodes().size() != 1 )
{
this.m_aErrorList.add("SugarUnitRepeat has more than one root residue.");
}
GlycoTraverser t_objTraverser = this.getTraverser(this);
t_objTraverser.traverseGraph(a_objRepeat);
// repeat count
if ( a_objRepeat.getMinRepeatCount() == a_objRepeat.getMaxRepeatCount() )
{
if ( a_objRepeat.getMinRepeatCount() < 7 && a_objRepeat.getMinRepeatCount() != SugarUnitRepeat.UNKNOWN )
{
this.m_aErrorList.add("SugarUnitRepeat with repeat count less than 7 are not allowed.");
}
}
else
{
if ( a_objRepeat.getMinRepeatCount() > a_objRepeat.getMaxRepeatCount() && a_objRepeat.getMaxRepeatCount() != SugarUnitRepeat.UNKNOWN )
{
this.m_aErrorList.add("Min repeat count of SugarUnitRepeat must be smaller than max repeat count.");
}
if ( a_objRepeat.getMinRepeatCount() < 1 && a_objRepeat.getMinRepeatCount() != SugarUnitRepeat.UNKNOWN )
{
this.m_aErrorList.add("Negative min repeat count of SugarUnitRepeat is not allowed.");
}
if ( a_objRepeat.getMaxRepeatCount() < 1 && a_objRepeat.getMaxRepeatCount() != SugarUnitRepeat.UNKNOWN )
{
this.m_aErrorList.add("Negative max repeat count of SugarUnitRepeat is not allowed.");
}
}
// test internal linkage
GlycoEdge t_objInternal = a_objRepeat.getRepeatLinkage();
this.testLinkageArray(t_objInternal);
// internal nodes part of repeat?
if ( t_objInternal.getChild() == null || t_objInternal.getParent() == null )
{
this.m_aErrorList.add("Child or parent residue of internal repeat linkage is null.");
}
else
{
if ( !a_objRepeat.containsNode(t_objInternal.getChild()) )
{
this.m_aErrorList.add("Child of repeat linkage is not part of the repeat unit.");
}
if ( !a_objRepeat.getRootNodes().contains(t_objInternal.getChild()) )
{
this.m_aErrorList.add("Child of repeat linkage is not part of the root nodes of the repeat unit.");
}
if ( !a_objRepeat.containsNode(t_objInternal.getParent()) )
{
this.m_aErrorList.add("Parent of repeat linkage is not part of the repeat unit.");
}
GlycoNode t_objParent = this.getSimpleGlycoNode(t_objInternal.getParent(),false);
GlycoNode t_objChild = this.getSimpleGlycoNode(t_objInternal.getChild(),true);
// internal linkagetypes
for (Iterator<Linkage> t_iterLinkages = t_objInternal.getGlycosidicLinkages().iterator(); t_iterLinkages.hasNext();)
{
Linkage t_objLinkage = t_iterLinkages.next();
if ( t_objParent != null )
{
if ( this.m_visNodeType.isMonosaccharide(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() == LinkageType.NONMONOSACCHARID ||
t_objLinkage.getParentLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for Monosaccharide).");
}
}
else if ( this.m_visNodeType.isNonMonosaccharide(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for NonMonosaccharide).");
}
}
else if ( this.m_visNodeType.isSubstituent(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for Substituent).");
}
}
else if ( this.m_visNodeType.isSugarUnitCyclic(t_objParent) )
{
this.m_aErrorList.add("Cyclic unit can not be the parent residue in a repeat linkage.");
}
}
if ( t_objChild != null )
{
if ( this.m_visNodeType.isMonosaccharide(t_objChild) )
{
if ( t_objLinkage.getChildLinkageType() == LinkageType.NONMONOSACCHARID ||
t_objLinkage.getChildLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for Monosaccharide).");
}
}
else if ( this.m_visNodeType.isNonMonosaccharide(t_objChild) )
{
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for NonMonosaccharide).");
}
}
else if ( this.m_visNodeType.isSubstituent(t_objChild) )
{
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for Substituent).");
}
}
else if ( this.m_visNodeType.isSugarUnitCyclic(t_objChild) )
{
this.m_aErrorList.add("Cyclic unit can not be the parent residue in a repeat linkage.");
}
}
if ( t_objChild != null && t_objParent != null )
{
if ( this.m_visNodeType.isMonosaccharide(t_objChild) && this.m_visNodeType.isMonosaccharide(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.H_AT_OH || t_objLinkage.getChildLinkageType() != LinkageType.DEOXY )
{
this.m_aErrorList.add("Repeatlinkage is not a glycosidic linkage.");
}
}
}
for (Iterator<Integer> t_iterChilds = t_objLinkage.getChildLinkages().iterator(); t_iterChilds.hasNext();)
{
Integer t_iPos = t_iterChilds.next();
if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage position smaller than 1 are not allowed.");
}
}
for (Iterator<Integer> t_iterChilds = t_objLinkage.getParentLinkages().iterator(); t_iterChilds.hasNext();)
{
Integer t_iPos = t_iterChilds.next();
if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage position smaller than 1 are not allowed.");
}
}
}
// linkage type into repeat
GlycoEdge t_objEdge= a_objRepeat.getParentEdge();
if ( t_objEdge != null )
{
for (Iterator<Linkage> t_iterLinkages = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkages.hasNext();)
{
Linkage t_objLinkage = t_iterLinkages.next();
// MUST BE NONMONOSACCHARIDE
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in linkage to repeat unit.");
}
for (Iterator<Integer> t_iterPos = t_objLinkage.getChildLinkages().iterator(); t_iterPos.hasNext();)
{
Integer t_iPos = t_iterPos.next();
if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage position smaller than 1 are not allowed.");
}
}
// if ( t_objChild != null )
// {
// if ( this.m_visNodeType.isMonosaccharide(t_objChild) )
// {
// if ( t_objLinkage.getChildLinkageType() == LinkageType.NONMONOSACCHARID ||
// t_objLinkage.getChildLinkageType() == LinkageType.UNVALIDATED )
// {
// this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for Monosaccharide).");
// }
// }
// else if ( this.m_visNodeType.isNonMonosaccharide(t_objChild) )
// {
// if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
// {
// this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for NonMonosaccharide).");
// }
// }
// else if ( this.m_visNodeType.isSubstituent(t_objChild) )
// {
// if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
// {
// this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for Substituent).");
// }
// }
// else if ( this.m_visNodeType.isSugarUnitCyclic(t_objChild) )
// {
// this.m_aErrorList.add("Cyclic unit can not be the parent residue in a repeat linkage.");
// }
// }
}
// TODO: test if linkage is possible
if ( t_objEdge.getGlycosidicLinkages().size() != a_objRepeat.getRepeatLinkage().getGlycosidicLinkages().size() )
{
this.m_aErrorList.add("Number of linkages into repeat and internal repeat linkages does not match.");
}
}
// linkage type from repeat
for (Iterator<GlycoEdge> t_iterChilds = a_objRepeat.getChildEdges().iterator(); t_iterChilds.hasNext();)
{
t_objEdge = t_iterChilds.next();
for (Iterator<Linkage> t_iterLinkages = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkages.hasNext();)
{
Linkage t_objLinkage = t_iterLinkages.next();
// MUST BE NONMONOSACCHARIDE
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in linkage from repeat unit.");
}
for (Iterator<Integer> t_iterPos = t_objLinkage.getParentLinkages().iterator(); t_iterPos.hasNext();)
{
Integer t_iPos = t_iterPos.next();
if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage position smaller than 1 are not allowed.");
}
}
// if ( t_objParent != null )
// {
// if ( this.m_visNodeType.isMonosaccharide(t_objParent) )
// {
// if ( t_objLinkage.getParentLinkageType() == LinkageType.NONMONOSACCHARID ||
// t_objLinkage.getParentLinkageType() == LinkageType.UNVALIDATED )
// {
// this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for Monosaccharide).");
// }
// }
// else if ( this.m_visNodeType.isNonMonosaccharide(t_objParent) )
// {
// if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
// {
// this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for NonMonosaccharide).");
// }
// }
// else if ( this.m_visNodeType.isSubstituent(t_objParent) )
// {
// if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
// {
// this.m_aErrorList.add("Wrong linkage type in internal repeat unit (for Substituent).");
// }
// }
// else if ( this.m_visNodeType.isSugarUnitCyclic(t_objParent) )
// {
// this.m_aErrorList.add("Cyclic unit can not be the parent residue in a repeat linkage.");
// }
// }
}
if ( t_objEdge.getGlycosidicLinkages().size() != a_objRepeat.getRepeatLinkage().getGlycosidicLinkages().size() )
{
this.m_aErrorList.add("Number of linkages out of repeat and internal repeat linkages does not match.");
}
}
// TODO : test repeat in and out residue if linkage is possible
}
for (Iterator<UnderdeterminedSubTree> t_iterSubTree = a_objRepeat.getUndeterminedSubTrees().iterator(); t_iterSubTree.hasNext();)
{
UnderdeterminedSubTree t_objTree = t_iterSubTree.next();
this.m_objGlycoGraph = t_objTree;
if ( t_objTree.getRootNodes().size() != 1 )
{
this.m_aErrorList.add("UnderdeterminedSubTree has more than one root residue.");
}
t_objTraverser.traverseGraph(t_objTree);
this.testUnderdeterminded(t_objTree, a_objRepeat);
}
this.m_objGlycoGraph = t_objGraph;
}
catch (GlycoconjugateException e)
{
throw new GlycoVisitorException(e.getMessage(),e);
}
}
/**
* gives you NonMS, MS, Unval. oder Subst
* if a_objResidue is a complex Node you get the first/last (a_bRepeatIn) Simple GlycoNode of the object
* in case a_objResidue is an alternative unit you get null
*/
private GlycoNode getSimpleGlycoNode(GlycoNode a_objResidue, boolean a_bRepeatIn) throws GlycoVisitorException
{
int t_iNodeType = this.m_visNodeType.getNodeType(a_objResidue);
if ( t_iNodeType == GlycoVisitorNodeType.REPEAT )
{
// if ( a_bRepeatIn )
// {
// return this.getSimpleGlycoNode(
// this.m_visNodeType.getSugarUnitRepeat(a_objResidue).getRepeatLinkage().getChild(),
// a_bRepeatIn);
// }
// else
// {
// return this.getSimpleGlycoNode(
// this.m_visNodeType.getSugarUnitRepeat(a_objResidue).getRepeatLinkage().getParent(),
// a_bRepeatIn);
// }
return a_objResidue;
}
else if ( t_iNodeType == GlycoVisitorNodeType.ALTERNATIVE )
{
return null;
}
else if ( t_iNodeType == GlycoVisitorNodeType.CYCLIC )
{
return this.getSimpleGlycoNode(
this.m_visNodeType.getSugarUnitCyclic(a_objResidue).getCyclicStart(),
a_bRepeatIn);
}
return a_objResidue;
}
public void visit(SugarUnitAlternative a_objAlternative) throws GlycoVisitorException
{
GlycoTraverser t_objTraverser = this.getTraverser(this);
if ( a_objAlternative.getAlternatives().size() < 2 )
{
this.m_aErrorList.add("SugarUnitAlternative must have at least two alternative units.");
}
for (Iterator<GlycoGraphAlternative> t_iterAlt = a_objAlternative.getAlternatives().iterator(); t_iterAlt.hasNext();)
{
GlycoGraphAlternative t_objAltGraph = t_iterAlt.next();
t_objTraverser.traverseGraph(t_objAltGraph);
// test lead in
if ( a_objAlternative.getParentEdge() == null )
{
if ( t_objAltGraph.getLeadInNode() != null )
{
this.m_aErrorList.add("SugarUnitAlternative without parent edge can not have lead in nodes.");
}
}
else
{
if ( t_objAltGraph.getLeadInNode() == null )
{
this.m_aErrorList.add("SugarUnitAlternative with parent edge has to have lead in nodes.");
}
}
// test lead out nodes
for (Iterator<GlycoEdge> t_iterChilds = a_objAlternative.getChildEdges().iterator(); t_iterChilds.hasNext();)
{
GlycoNode t_objChild = t_iterChilds.next().getChild();
if ( !t_objAltGraph.getLeadOutNodeToNode().containsKey(t_objChild) )
{
this.m_aErrorList.add("Child node is missing in lead out node definition.");
}
}
if ( t_objAltGraph.getLeadOutNodeToNode().size() != a_objAlternative.getChildEdges().size() )
{
this.m_aErrorList.add("Number of lead out nodes for a alternative tree and number of child residues for a Alternative unit must be identical.");
}
}
// test linkages into alternative
GlycoNode t_objLead = null;
boolean t_bAllSame = true;
if ( a_objAlternative.getParentEdge() != null )
{
for (Iterator<GlycoGraphAlternative> t_iterAlt = a_objAlternative.getAlternatives().iterator(); t_iterAlt.hasNext();)
{
GlycoGraphAlternative t_objAltGraph = t_iterAlt.next();
if ( t_objLead == null )
{
t_objLead = t_objAltGraph.getLeadInNode();
}
else
{
if ( this.m_visNodeType.getNodeType(t_objLead) != this.m_visNodeType.getNodeType(t_objAltGraph.getLeadInNode()) )
{
t_bAllSame = false;
}
}
}
if ( t_bAllSame )
{
if ( this.testLinkageType(a_objAlternative.getParentEdge().getParent(),a_objAlternative.getParentEdge(),t_objLead) )
{
this.m_aErrorList.add("Lead in linkage of alternative trees have the false linkage type.");
}
}
else
{
for (Iterator<Linkage> t_iterLinakge = a_objAlternative.getParentEdge().getGlycosidicLinkages().iterator(); t_iterLinakge.hasNext();)
{
Linkage t_objLinkage = t_iterLinakge.next();
if ( t_objLinkage.getChildLinkageType() != LinkageType.UNKNOWN )
{
this.m_aErrorList.add("For alternative trees with diferent types of lead in node the linkage type of the parent linkage has to be UNKNOWN.");
}
}
}
}
// test linkages from alternative
for (Iterator<GlycoEdge> t_iterChild = a_objAlternative.getChildEdges().iterator(); t_iterChild.hasNext();)
{
GlycoEdge t_objEdge = t_iterChild.next();
t_bAllSame = true;
t_objLead = null;
for (Iterator<GlycoGraphAlternative> t_iterAlt = a_objAlternative.getAlternatives().iterator(); t_iterAlt.hasNext();)
{
GlycoGraphAlternative t_objAltGraph = t_iterAlt.next();
if ( t_objLead == null )
{
t_objLead = t_objAltGraph.getLeadOutNodeToNode().get(t_objEdge.getChild());
}
else
{
if ( this.m_visNodeType.getNodeType(t_objLead) != this.m_visNodeType.getNodeType(t_objAltGraph.getLeadOutNodeToNode().get(t_objEdge.getChild())) )
{
t_bAllSame = false;
}
}
}
if ( t_bAllSame )
{
if ( this.testLinkageType(t_objLead,t_objEdge,t_objEdge.getChild()) )
{
this.m_aErrorList.add("Lead out linkage of alternative trees have the false linkage type.");
}
}
else
{
for (Iterator<Linkage> t_iterLinakge = t_objEdge.getGlycosidicLinkages().iterator(); t_iterLinakge.hasNext();)
{
Linkage t_objLinkage = t_iterLinakge.next();
if ( t_objLinkage.getParentLinkageType() != LinkageType.UNKNOWN )
{
this.m_aErrorList.add("For alternative trees with different types of lead out node, the linkage type of the corresponding child linkage has to be UNKNOWN.");
}
}
}
}
}
/**
* @throws GlycoVisitorException
*
*/
private boolean testLinkageType(GlycoNode a_objParent, GlycoEdge a_objEdge, GlycoNode a_objChild) throws GlycoVisitorException
{
GlycoNode t_objParent = this.getSimpleGlycoNode(a_objParent, false);
GlycoNode t_objChild = this.getSimpleGlycoNode(a_objChild , true );
boolean t_bResult = true;
for (Iterator<Linkage> t_iterLinkages = a_objEdge.getGlycosidicLinkages().iterator(); t_iterLinkages.hasNext();)
{
Linkage t_objLinkage = t_iterLinkages.next();
if ( t_objParent != null )
{
if ( this.m_visNodeType.isMonosaccharide(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() == LinkageType.NONMONOSACCHARID ||
t_objLinkage.getParentLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("Wrong linkage type in linkage (for Monosaccharide).");
}
}
else if ( this.m_visNodeType.isNonMonosaccharide(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in linkage (for NonMonosaccharide).");
}
}
else if ( this.m_visNodeType.isSubstituent(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in linkage (for Substituent).");
}
}
else if ( this.m_visNodeType.isSugarUnitRepeat(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in linkage (for Repeat).");
}
t_objParent = null;
}
}
if ( t_objChild != null )
{
if ( this.m_visNodeType.isMonosaccharide(t_objChild) )
{
if ( t_objLinkage.getChildLinkageType() == LinkageType.NONMONOSACCHARID ||
t_objLinkage.getChildLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("Wrong linkage type in linkage (for Monosaccharide).");
}
}
else if ( this.m_visNodeType.isNonMonosaccharide(t_objChild) )
{
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in linkage (for NonMonosaccharide).");
}
}
else if ( this.m_visNodeType.isSubstituent(t_objChild) )
{
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in linkage (for Substituent).");
}
}
else if ( this.m_visNodeType.isSugarUnitRepeat(t_objChild) )
{
if ( t_objLinkage.getChildLinkageType() != LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Wrong linkage type in linkage (for Repeat).");
}
t_objChild = null;
}
}
if ( t_objChild != null && t_objParent != null )
{
if ( this.m_visNodeType.isMonosaccharide(t_objChild) && this.m_visNodeType.isMonosaccharide(t_objParent) )
{
if ( t_objLinkage.getParentLinkageType() != LinkageType.H_AT_OH || t_objLinkage.getChildLinkageType() != LinkageType.DEOXY )
{
this.m_aErrorList.add("Linkage is not a glycosidic linkage.");
}
}
}
}
return t_bResult;
}
public void visit(Monosaccharide a_objMonosaccharid) throws GlycoVisitorException
{
// Monosaccharide properties
// Null Values
if (a_objMonosaccharid.getAnomer()==null){
this.m_aErrorList.add("Anomer null not valid.");
}
if (a_objMonosaccharid.getSuperclass()==null){
this.m_aErrorList.add("Superclass null not valid.");
}
if (a_objMonosaccharid.getBaseType()==null){
this.m_aErrorList.add("Basetype List null not valid.");
}
Integer t_iChainLength= a_objMonosaccharid.getSuperclass().getCAtomCount();
if ( a_objMonosaccharid.getSuperclass() == Superclass.SUG )
{
t_iChainLength = 100;
}
for (Modification t_objModi : a_objMonosaccharid.getModification()){
if ( t_iChainLength < t_objModi.getPositionOne() )
{
this.m_aErrorList.add("Modification is out of C-bounds");
}
if (t_objModi.hasPositionTwo())
{
if (t_iChainLength < t_objModi.getPositionTwo() )
{
this.m_aErrorList.add("Modification is out of C-bounds");
}
}
//aldi only at C1
if ( t_objModi.getModificationType() == ModificationType.ALDI && t_objModi.getPositionOne() != 1 )
{
this.m_aErrorList.add("Alditol is only allowed for C1");
if (a_objMonosaccharid.getRingStart()==1){
this.m_aErrorList.add("C1 cannot be ringstart in alditols");
}
}
// deoxy not on C1
if ( t_objModi.getModificationType() == ModificationType.DEOXY && t_objModi.getPositionOne() == 1 )
{
this.m_aErrorList.add("Deoxy on C1 impossible");
}
// Acidic only at terminal C
if (t_objModi.getModificationType() == ModificationType.ACID)
{
if ( t_objModi.getPositionOne() != 1 && t_objModi.getPositionOne() != t_iChainLength )
{
this.m_aErrorList.add("Acidic functions only at terminal C");
}
}
// TODO Enx en can only be in conjunction with d and aldi.
// Other conjunctions are not valid
if (t_objModi.getModificationType() == ModificationType.UNKNOWN_DOUBLEBOND ||
t_objModi.getModificationType() == ModificationType.DOUBLEBOND )
{
for (Modification m : a_objMonosaccharid.getModification()){
if (m.getPositionOne()==t_objModi.getPositionOne()){
if (m!=t_objModi){
if ( m.getModificationType()!=ModificationType.DEOXY &&
m.getModificationType()!=ModificationType.ALDI){
this.m_aErrorList.add("Double bonds cannot have other modifications than Deoxy or aldi");
}
}
}
}
for (Modification m : a_objMonosaccharid.getModification()){
if (m.getPositionOne()==t_objModi.getPositionOne()+1){
if (m!=t_objModi){
if ( m.getModificationType()!=ModificationType.DEOXY ){
this.m_aErrorList.add("Double bonds cannot have other modifications than Deoxy at n+1");
}
}
}
}
}
}
// TODO
//check if basetype sequence is correct according to definition IUPAC (max size first)
if (a_objMonosaccharid.getBaseType().size()>1){
for (int i = 0; i < a_objMonosaccharid.getBaseType().size(); i++) {
BaseType t_basetype = a_objMonosaccharid.getBaseType().get(i);
if (i!=0 && t_basetype.getStereoCode().length()!=4){
this.m_aErrorList.add("Basetype order is not according to IUPAC");
}
}
}
// check if superclass corresponds to Basetype/modifications pattern
// impossible for jokers like hep, oct, non etc
if (a_objMonosaccharid.getBaseType().size()!=0)
{
Integer t_IntSuperclass = a_objMonosaccharid.getSuperclass().getCAtomCount();
HashMap <Integer,String> modihash = new HashMap <Integer,String>();
Integer t_IntTheoreticalNumber=0;
for (Iterator<BaseType> iter = a_objMonosaccharid.getBaseType().iterator(); iter.hasNext();) {
BaseType element = iter.next();
t_IntTheoreticalNumber+=element.getStereoCode().length();
}
t_IntTheoreticalNumber+=2; // Basetype length indicated
for (Iterator<Modification> iter = a_objMonosaccharid.getModification().iterator(); iter.hasNext();) {
Modification element = iter.next();
//deoxys and ketos
if ((element.getName()=="d" || element.getName()=="keto") &&
(element.getPositionOne()!=1 && element.getPositionOne()!=a_objMonosaccharid.getSuperclass().getCAtomCount())){
if (!modihash.containsKey(element.getPositionOne())){
modihash.put(element.getPositionOne(),element.getName());
t_IntTheoreticalNumber++;
}
}
//en - cases
Integer d_count=0;
if (element.getName()=="enx"||element.getName()=="en"){
// scan for possible deoxys around
for (Iterator<Modification> iter2 = a_objMonosaccharid.getModification().iterator(); iter2.hasNext();) {
Modification element2 = iter2.next();
//deoxy at pos1
if (element2.getPositionOne()==element.getPositionOne() && element2.getName()=="d"){
d_count++;
}
//deoxy at pos2
if (element2.getPositionOne()==element.getPositionTwo() && element2.getName()=="d"){
d_count++;
}
}
// Add 2 for en, reduce number for named deoxys within double bond
t_IntTheoreticalNumber=t_IntTheoreticalNumber+2-d_count;
}
}
if (t_IntTheoreticalNumber!=t_IntSuperclass){
this.m_aErrorList.add("Error on superclass definition"+t_IntTheoreticalNumber+"===="+t_IntSuperclass);
}
}
// ringStart < ringEnd and not >C-atom count
if ( a_objMonosaccharid.getRingEnd() > t_iChainLength || a_objMonosaccharid.getRingStart() > t_iChainLength)
{
this.m_aErrorList.add("Ring end out of C-backbone");
}
//check if ring size is valid and corresponds to Carbonyl-function(s).
// TODO
if ( a_objMonosaccharid.getRingEnd() > 0 && a_objMonosaccharid.getRingStart() > 0 )
{
//get all keto functions
Integer t_IntRingSize=0;
ArrayList <Integer> t_IntKeto= new ArrayList <Integer>();
for (Iterator<Modification> iter3 = a_objMonosaccharid.getModification().iterator(); iter3.hasNext();)
{
Modification element3 = iter3.next();
if (element3.getModificationType()==ModificationType.KETO)
{
t_IntKeto.add(element3.getPositionOne());
}
}
if (!(t_IntKeto.contains(a_objMonosaccharid.getRingStart()) || a_objMonosaccharid.getRingStart()==1))
{
this.m_aErrorList.add("Ring has to start at a carbonyl function");
}
t_IntRingSize=a_objMonosaccharid.getRingEnd()-a_objMonosaccharid.getRingStart();
if (a_objMonosaccharid.getRingStart()+t_IntRingSize>a_objMonosaccharid.getSuperclass().getCAtomCount())
{
this.m_aErrorList.add("Ring size exceeds backbone");
}
}
//anomer settings open ring?
if (a_objMonosaccharid.getAnomer()==Anomer.OpenChain)
{
if (a_objMonosaccharid.getRingStart() != Monosaccharide.OPEN_CHAIN || a_objMonosaccharid.getRingEnd()!= Monosaccharide.OPEN_CHAIN )
{
this.m_aErrorList.add("Open chain has no ring closure");
}
}
if (a_objMonosaccharid.getRingStart()!=Monosaccharide.OPEN_CHAIN && a_objMonosaccharid.getRingEnd()!= Monosaccharide.OPEN_CHAIN)
{
if (a_objMonosaccharid.getAnomer()==Anomer.OpenChain)
{
this.m_aErrorList.add("Open chain has no ring closure");
}
}
// holen aller linkages um das monosaccharide und pruefen ob die positionen und linkagetypen sein koennnen
// ob linkagetypen fuer ms-ms linkages stimmen ist schon getestet
// prepare Arrays
ArrayList<Boolean> t_aOH = new ArrayList<Boolean>();
ArrayList<Boolean> t_aH = new ArrayList<Boolean>();
for (int t_iCounter = 0; t_iCounter <= t_iChainLength; t_iCounter++)
{
t_aH.add(true);
t_aOH.add(true);
}
// run over modification
for (Iterator<Modification> t_iterModi = a_objMonosaccharid.getModification().iterator(); t_iterModi.hasNext();)
{
Modification t_objModi = t_iterModi.next();
if ( t_objModi.getPositionOne() > 0 )
{
int t_iTwo = 0;
if ( t_objModi.getPositionTwo() != null )
{
if ( t_objModi.getPositionTwo() > 0 )
{
t_iTwo = 0;
}
}
if ( t_iTwo > t_iChainLength || t_objModi.getPositionOne() > t_iChainLength )
{
this.m_aErrorList.add("Modification postition out of chain length.");
}
else
{
if ( t_objModi.getModificationType() == ModificationType.ACID )
{
t_aH.set(t_objModi.getPositionOne(), false);
}
else if ( t_objModi.getModificationType() == ModificationType.DEOXY )
{
t_aOH.set(t_objModi.getPositionOne(), false);
}
else if ( t_objModi.getModificationType() == ModificationType.KETO )
{
t_aH.set(t_objModi.getPositionOne(), false);
if ( t_objModi.getPositionOne() != a_objMonosaccharid.getRingStart() && a_objMonosaccharid.getRingStart() != Monosaccharide.UNKNOWN_RING )
{
t_aOH.set(t_objModi.getPositionOne(), false);
}
}
else if ( t_objModi.getModificationType() == ModificationType.TRIPLEBOND )
{
t_aH.set(t_objModi.getPositionOne(), false);
t_aOH.set(t_objModi.getPositionOne(), false);
}
}
}
}
Linkage t_objLinkage;
int t_iPos;
// arrays are prepared now we look for parent array ... first ignore alternative linkages
if ( a_objMonosaccharid.getParentEdge() != null )
{
for (Iterator<Linkage> t_iterEdge = a_objMonosaccharid.getParentEdge().getGlycosidicLinkages().iterator(); t_iterEdge.hasNext();)
{
t_objLinkage = t_iterEdge.next();
if ( t_objLinkage.getChildLinkages().size() == 1 )
{
for (Iterator<Integer> t_iterPos = t_objLinkage.getChildLinkages().iterator(); t_iterPos.hasNext();)
{
t_iPos = t_iterPos.next();
if ( t_iPos > 0 )
{
if ( t_iPos > t_iChainLength )
{
this.m_aErrorList.add("Attache position of monosaccharide parent edge is out of chain length.");
}
else
{
if ( t_objLinkage.getChildLinkageType() == LinkageType.DEOXY )
{
if ( t_aOH.get(t_iPos) )
{
t_aOH.set(t_iPos,false);
}
else
{
this.m_aErrorList.add("Attache position of monosaccharide parent edge is not possible (DEOXY).");
}
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.H_AT_OH )
{
if ( t_aOH.get(t_iPos) )
{
t_aOH.set(t_iPos,false);
}
else
{
this.m_aErrorList.add("Attache position of monosaccharide parent edge is not possible (H_AT_OH).");
}
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.H_LOSE )
{
if ( t_aH.get(t_iPos) )
{
t_aH.set(t_iPos,false);
}
else
{
this.m_aErrorList.add("Attache position of monosaccharide parent edge is not possible (H_LOSE).");
}
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Nonmonosaccharide linkage tpyes are not allowed in monosaccharide parent edge.");
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("Unvalidated linkage tpyes are not allowed in monosaccharide parent edge.");
}
}
}
else if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage positions smaller than 1 are not allowed.");
}
}
}
}
}
// do the same thing for child edges
for (GlycoEdge t_edge : a_objMonosaccharid.getChildEdges())
{
for (Iterator<Linkage> t_iterEdge = t_edge.getGlycosidicLinkages().iterator(); t_iterEdge.hasNext();)
{
t_objLinkage = t_iterEdge.next();
if ( t_objLinkage.getParentLinkages().size() == 1 )
{
for (Iterator<Integer> t_iterPos = t_objLinkage.getParentLinkages().iterator(); t_iterPos.hasNext();)
{
t_iPos = t_iterPos.next();
if ( t_iPos > 0 )
{
if ( t_iPos > t_iChainLength )
{
this.m_aErrorList.add("Attache position of monosaccharide parent edge is out of chain length.");
}
else
{
if ( t_objLinkage.getParentLinkageType() == LinkageType.DEOXY )
{
if ( t_aOH.get(t_iPos) )
{
t_aOH.set(t_iPos,false);
}
else
{
this.m_aErrorList.add("Attache position of monosaccharide child edge is not possible (DEOXY).");
}
}
else if ( t_objLinkage.getParentLinkageType() == LinkageType.H_AT_OH )
{
if ( t_aOH.get(t_iPos) )
{
t_aOH.set(t_iPos,false);
}
else
{
this.m_aErrorList.add("Attache position of monosaccharide child edge is not possible (H_AT_OH).");
}
}
else if ( t_objLinkage.getParentLinkageType() == LinkageType.H_LOSE )
{
if ( t_aH.get(t_iPos) )
{
t_aH.set(t_iPos,false);
}
else
{
this.m_aErrorList.add("Attache position of monosaccharide child edge is not possible (H_LOSE).");
}
}
else if ( t_objLinkage.getParentLinkageType() == LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Nonmonosaccharide linkage tpyes are not allowed in monosaccharide parent edge.");
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("Unvalidated linkage tpyes are not allowed in monosaccharide parent edge.");
}
}
}
else if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage positions smaller than 1 are not allowed.");
}
}
}
}
}
// now we test if the positions of alternative linkages are still available
if ( a_objMonosaccharid.getParentEdge() != null )
{
for (Iterator<Linkage> t_iterEdge = a_objMonosaccharid.getParentEdge().getGlycosidicLinkages().iterator(); t_iterEdge.hasNext();)
{
t_objLinkage = t_iterEdge.next();
if ( t_objLinkage.getChildLinkages().size() > 1 )
{
for (Iterator<Integer> t_iterPos = t_objLinkage.getChildLinkages().iterator(); t_iterPos.hasNext();)
{
t_iPos = t_iterPos.next();
if ( t_iPos > 0 )
{
if ( t_iPos > t_iChainLength )
{
this.m_aErrorList.add("Alternative attache position of monosaccharide parent edge is out of chain length.");
}
else
{
if ( t_objLinkage.getChildLinkageType() == LinkageType.DEOXY )
{
if ( !t_aOH.get(t_iPos) )
{
this.m_aErrorList.add("Alternative attache position of monosaccharide parent edge is not possible (DEOXY).");
}
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.H_AT_OH )
{
if ( !t_aOH.get(t_iPos) )
{
this.m_aErrorList.add("Alternative attache position of monosaccharide parent edge is not possible (H_AT_OH).");
}
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.H_LOSE )
{
if ( !t_aH.get(t_iPos) )
{
this.m_aErrorList.add("Alternative attache position of monosaccharide parent edge is not possible (H_LOSE).");
}
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Nonmonosaccharide linkage tpyes are not allowed in monosaccharide parent edge.");
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("Unvalidated linkage tpyes are not allowed in monosaccharide parent edge.");
}
}
}
else if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage positions smaller than 1 are not allowed.");
}
}
}
}
}
for (GlycoEdge t_edge : a_objMonosaccharid.getChildEdges())
{
for (Iterator<Linkage> t_iterEdge = t_edge.getGlycosidicLinkages().iterator(); t_iterEdge.hasNext();)
{
t_objLinkage = t_iterEdge.next();
if ( t_objLinkage.getParentLinkages().size() > 1 )
{
for (Iterator<Integer> t_iterPos = t_objLinkage.getParentLinkages().iterator(); t_iterPos.hasNext();)
{
t_iPos = t_iterPos.next();
if ( t_iPos > 0 )
{
if ( t_iPos > t_iChainLength )
{
this.m_aErrorList.add("Alternative attache position of monosaccharide parent edge is out of chain length.");
}
else
{
if ( t_objLinkage.getParentLinkageType() == LinkageType.DEOXY )
{
if ( !t_aOH.get(t_iPos) )
{
this.m_aErrorList.add("Alternative attache position of monosaccharide child edge is not possible (DEOXY).");
}
}
else if ( t_objLinkage.getParentLinkageType() == LinkageType.H_AT_OH )
{
if ( !t_aOH.get(t_iPos) )
{
this.m_aErrorList.add("Alternative attache position of monosaccharide child edge is not possible (H_AT_OH).");
}
}
else if ( t_objLinkage.getParentLinkageType() == LinkageType.H_LOSE )
{
if ( !t_aH.get(t_iPos) )
{
this.m_aErrorList.add("Alternative attache position of monosaccharide child edge is not possible (H_LOSE).");
}
}
else if ( t_objLinkage.getParentLinkageType() == LinkageType.NONMONOSACCHARID )
{
this.m_aErrorList.add("Nonmonosaccharide linkage tpyes are not allowed in monosaccharide parent edge.");
}
else if ( t_objLinkage.getChildLinkageType() == LinkageType.UNVALIDATED )
{
this.m_aErrorList.add("Unvalidated linkage tpyes are not allowed in monosaccharide parent edge.");
}
}
}
else if ( t_iPos < 1 && t_iPos != Linkage.UNKNOWN_POSITION )
{
this.m_aErrorList.add("Linkage positions smaller than 1 are not allowed.");
}
}
}
}
}
// test ob linkage richtung stimmt.
for (GlycoEdge t_edge : a_objMonosaccharid.getChildEdges())
{
this.validateDirection(t_edge);
}
// test if parent edge is a ms-ms linkage
if ( a_objMonosaccharid.getParentEdge() != null )
{
this.testLinkageType(a_objMonosaccharid.getParentNode(), a_objMonosaccharid.getParentEdge(), a_objMonosaccharid);
}
}
private boolean validateDirection(GlycoEdge t_edge) throws GlycoVisitorException {
GlycoVisitorNodeType t_gvis = new GlycoVisitorNodeType();
if (t_gvis.isMonosaccharide(t_edge.getChild())){
Monosaccharide t_mono = t_gvis.getMonosaccharide(t_edge.getChild());
ArrayList <Integer> t_aIntKeto= new ArrayList <Integer>();
for (Iterator<Modification> iter3 = t_mono.getModification().iterator(); iter3.hasNext();)
{
Modification element3 = iter3.next();
if (element3.getModificationType()==ModificationType.KETO)
{
t_aIntKeto.add(element3.getPositionOne());
}
}
if (t_aIntKeto.size()==0){
t_aIntKeto.add(1);
}
for (Linkage t_lin : t_edge.getGlycosidicLinkages()){
for (Integer i : t_lin.getChildLinkages()){
if (t_aIntKeto.contains(i))
{
return true;
}
}
}
this.m_aErrorList.add("Child parent relationship (direction of linkages) broken");
return false;
}
else if (t_gvis.isMonosaccharide(t_edge.getParent())){
Monosaccharide t_mono = t_gvis.getMonosaccharide(t_edge.getParent());
ArrayList <Integer> t_aIntKeto= new ArrayList <Integer>();
for (Iterator<Modification> iter3 = t_mono.getModification().iterator(); iter3.hasNext();)
{
Modification element3 = iter3.next();
if (element3.getModificationType()==ModificationType.KETO)
{
t_aIntKeto.add(element3.getPositionOne());
}
}
if (t_aIntKeto.size()==0){
t_aIntKeto.add(1);
}
for (Linkage t_lin : t_edge.getGlycosidicLinkages()){
for (Integer i : t_lin.getParentLinkages()){
if (t_aIntKeto.contains(i))
{
this.m_aErrorList.add("Parent child relationship (direction of linkages) broken.");
return false;
}
}
}
return true;
}
return true;
}
public void clear()
{
this.m_aErrorList.clear();
this.m_aWarningList.clear();
this.m_aEdge.clear();
}
}