/* * EuroCarbDB, a framework for carbohydrate bioinformatics * * Copyright (c) 2006-2009, Eurocarb project, or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * A copy of this license accompanies this distribution in the file LICENSE.txt. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * Last commit: $Rev: 1210 $ by $Author: glycoslave $ on $Date:: 2009-06-12 #$ */ /** @author Alessio Ceroni (a.ceroni@imperial.ac.uk) */ package org.eurocarbdb.application.glycoworkbench.plugin.reporting; import org.eurocarbdb.application.glycoworkbench.plugin.grammar.*; import org.eurocarbdb.application.glycoworkbench.plugin.*; import org.eurocarbdb.application.glycoworkbench.*; import org.eurocarbdb.application.glycanbuilder.*; import java.util.*; public class ProfilesComparisonReportDocument { public static class Row { public Glycan structure; public String name; public double mz_ratio; public double[] intensities_firstgroup; public double[] intensities_secondgroup; public Row(Glycan g, String n, int fg_size, int sg_size) { structure = g.clone(); name = n; mz_ratio = g.computeMZ(); intensities_firstgroup = new double[fg_size]; intensities_secondgroup = new double[sg_size]; Arrays.fill(intensities_firstgroup,0.); Arrays.fill(intensities_secondgroup,0.); } public Row(FragmentEntry fe, int fg_size, int sg_size) { this(fe.fragment,fe.name,fg_size,sg_size); } public double getColumn(int ind) { if( ind<intensities_firstgroup.length ) return intensities_firstgroup[ind]; return intensities_secondgroup[ind-intensities_firstgroup.length]; } public int getFirstGroupSize() { return intensities_firstgroup.length; } public int getSecondGroupSize() { return intensities_secondgroup.length; } public void addIntensities(Row other) { addIntensities(other,1.); } public void addIntensities(Row other, double weight) { int fg_size = Math.min(this.getFirstGroupSize(),other.getFirstGroupSize()); for( int i=0; i<fg_size; i++ ) this.intensities_firstgroup[i] += weight * other.intensities_firstgroup[i]; int sg_size = Math.min(this.getSecondGroupSize(),other.getSecondGroupSize()); for( int i=0; i<sg_size; i++ ) this.intensities_secondgroup[i] += weight * other.intensities_secondgroup[i]; } } private ProfilesComparisonReportOptions theOptions; private Grammar theGrammar; private Vector<String> names; private Vector<String> namesFirstGroup; private Vector<String> namesSecondGroup; private Vector<Row> rows; public ProfilesComparisonReportDocument(Vector<Scan> firstGroup, Vector<Scan> secondGroup, Map<Scan,String> scanNameMap, Grammar grammar, ProfilesComparisonReportOptions options) throws Exception { theOptions = options; theGrammar = grammar; // collect data rows = collectData(firstGroup,secondGroup); // collect names namesFirstGroup = collectNames(firstGroup,scanNameMap); namesSecondGroup = collectNames(secondGroup,scanNameMap); names = new Union<String>(namesFirstGroup).and(namesSecondGroup); // normalize normalize(rows,options); // deconvolute rows = deconvolute(rows,grammar,options); // normalize by row if( options.NORMALIZEBYROW ) normalizeByRow(rows); else normalizeByTable(rows); } public Vector<String> getNames() { return names; } public Vector<String> getNamesFirstGroup() { return namesFirstGroup; } public Vector<String> getNamesSecondGroup() { return namesSecondGroup; } public Vector<Row> getRows() { return rows; } public int getNoRows() { return rows.size(); } public int getNoColumns() { if( rows.size()==0 ) return 0; int fg_size = rows.get(0).intensities_firstgroup.length; int sg_size = rows.get(0).intensities_secondgroup.length; return fg_size+sg_size; } public int getNoColumnsFirstGroup() { if( rows.size()==0 ) return 0; return rows.get(0).intensities_firstgroup.length; } public int getNoColumnsSecondGroup() { if( rows.size()==0 ) return 0; return rows.get(0).intensities_secondgroup.length; } public int size() { return rows.size(); } private static Vector<Row> collectData(Vector<Scan> firstGroup, Vector<Scan> secondGroup) { int fg_size = (firstGroup!=null) ?firstGroup.size() :0; int sg_size = (secondGroup!=null) ?secondGroup.size() :0; TreeMap<FragmentEntry,Row> ret = new TreeMap<FragmentEntry,Row>(); // collect data for first group for( int i=0; i<fg_size; i++ ) { Scan s = firstGroup.get(i); PeakAnnotationCollection pac = s.getAnnotatedPeakList().getPeakAnnotationCollection(0); for( PeakAnnotation pa : pac.getPeakAnnotations() ) { if( pa.isAnnotated() ) { Row row = ret.get(pa.getAnnotation().getFragmentEntry()); if( row==null ) { row = new Row(pa.getAnnotation().getFragmentEntry(),fg_size,sg_size); row.name = GlycanRenderer.makeCompositionTextPlain(row.structure.getComposition(false)); ret.put(pa.getAnnotation().getFragmentEntry(),row); } row.intensities_firstgroup[i] += pa.getPeak().getIntensity(); } } } // collect data for second group for( int i=0; i<sg_size; i++ ) { Scan s = secondGroup.get(i); PeakAnnotationCollection pac = s.getAnnotatedPeakList().getPeakAnnotationCollection(0); for( PeakAnnotation pa : pac.getPeakAnnotations() ) { if( pa.isAnnotated() ) { Row row = ret.get(pa.getAnnotation().getFragmentEntry()); if( row==null ) { row = new Row(pa.getAnnotation().getFragmentEntry(),fg_size,sg_size); row.name = GlycanRenderer.makeCompositionTextPlain(row.structure.getComposition(false)); ret.put(pa.getAnnotation().getFragmentEntry(),row); } row.intensities_secondgroup[i] += pa.getPeak().getIntensity(); } } } return new Vector<Row>(ret.values()); } private static Vector<String> collectNames(Vector<Scan> group, Map<Scan,String> scanNameMap) { Vector<String> ret = new Vector<String>(); if( group!=null ) { for( Scan s : group ) ret.add(scanNameMap.get(s)); } return ret; } private static void normalize(Vector<Row> rows, ProfilesComparisonReportOptions options) { if( rows.size()==0 ) return; int fg_size = rows.get(0).intensities_firstgroup.length; for( int i=0; i<fg_size; i++ ) { double div = 0.; for( Row row : rows ) { if( options.NORMALIZATION == options.BASEPEAK ) div = Math.max(div,row.intensities_firstgroup[i]); else if( options.NORMALIZATION == options.SUM ) div += row.intensities_firstgroup[i]; else if( options.NORMALIZATION == options.AVERAGE ) div += row.intensities_firstgroup[i]/(double)rows.size(); } for( Row row : rows ) row.intensities_firstgroup[i] /= div; } int sg_size = rows.get(0).intensities_secondgroup.length; for( int i=0; i<sg_size; i++ ) { double div = 0.; for( Row row : rows ) { if( options.NORMALIZATION == options.BASEPEAK ) div = Math.max(div,row.intensities_secondgroup[i]); else if( options.NORMALIZATION == options.SUM ) div += row.intensities_secondgroup[i]; else if( options.NORMALIZATION == options.AVERAGE ) div += row.intensities_secondgroup[i]/(double)rows.size(); } for( Row row : rows ) row.intensities_secondgroup[i] /= div; } } private static void normalizeByRow(Vector<Row> rows) { if( rows.size()==0 ) return; int fg_size = rows.get(0).intensities_firstgroup.length; int sg_size = rows.get(0).intensities_secondgroup.length; for( Row row : rows ) { double min = 0.; double max = 0.; for( int i=0; i<fg_size; i++ ) { max = Math.max(row.intensities_firstgroup[i],max); min = Math.min(row.intensities_firstgroup[i],min); } for( int i=0; i<sg_size; i++ ) { max = Math.max(row.intensities_secondgroup[i],max); min = Math.min(row.intensities_secondgroup[i],min); } double center = (max+min)/2; double deviation = (max-min)/2; for( int i=0; i<fg_size; i++ ) row.intensities_firstgroup[i] = (row.intensities_firstgroup[i]-center)/deviation; for( int i=0; i<sg_size; i++ ) row.intensities_secondgroup[i] = (row.intensities_secondgroup[i]-center)/deviation; } } private static void normalizeByTable(Vector<Row> rows) { int fg_size = rows.get(0).intensities_firstgroup.length; int sg_size = rows.get(0).intensities_secondgroup.length; double max = 0.;; for( Row row : rows ) { for( int i=0; i<fg_size; i++ ) max = Math.max(Math.abs(row.intensities_firstgroup[i]),max); for( int i=0; i<sg_size; i++ ) max = Math.max(Math.abs(row.intensities_secondgroup[i]),max); } for( Row row : rows ) { for( int i=0; i<fg_size; i++ ) row.intensities_firstgroup[i] /= max; for( int i=0; i<sg_size; i++ ) row.intensities_secondgroup[i] /= max; } } private static Vector<Row> deconvolute(Vector<Row> rows, Grammar grammar, ProfilesComparisonReportOptions options) throws Exception { if( rows.size()==0 || options.DECONVOLUTION == ProfilesComparisonReportOptions.NONE ) return rows; int fg_size = rows.get(0).getFirstGroupSize(); int sg_size = rows.get(0).getSecondGroupSize(); TreeMap<FragmentEntry,Row> ret = new TreeMap<FragmentEntry,Row>(); // place uncertain antennae in fuzzy structures if( options.DECONVOLUTION == ProfilesComparisonReportOptions.DISACCHARIDES || options.DECONVOLUTION == ProfilesComparisonReportOptions.TERMINALS ) rows = resolveFuzzyStructures(rows,grammar); for( Row r : rows ) { Map<FragmentEntry,Integer> components; if( options.DECONVOLUTION == ProfilesComparisonReportOptions.MONOSACCHARIDES ) components = getMonosaccharides(r.structure); else if( options.DECONVOLUTION == ProfilesComparisonReportOptions.DISACCHARIDES ) components = getDisaccharides(r.structure); else if( options.DECONVOLUTION == ProfilesComparisonReportOptions.CORES ) components = getCore(r.structure); else if( options.DECONVOLUTION == ProfilesComparisonReportOptions.TERMINALS ) components = getTerminals(r.structure); else throw new Exception("Uknown deconvolution type"); for( Map.Entry<FragmentEntry,Integer> e : components.entrySet() ) { Row dest = ret.get(e.getKey()); if( dest==null ) { dest = new Row(e.getKey(),fg_size,sg_size); ret.put(e.getKey(),dest); } dest.addIntensities(r,(int)e.getValue()); } } return new Vector<Row>(ret.values()); } private static Vector<Row> resolveFuzzyStructures(Vector<Row> rows, Grammar grammar) throws Exception { if( rows.size()==0 ) return rows; int fg_size = rows.get(0).getFirstGroupSize(); int sg_size = rows.get(0).getSecondGroupSize(); // create grammar from non-fuzzy structure if necessary //if( grammar == null ) { Vector<Glycan> nonfuzzy_structures = new Vector<Glycan>(); for( Row r : rows ) { if( !r.structure.isFuzzy() ) nonfuzzy_structures.add(r.structure); } GrammarOptions options = new GrammarOptions(); options.ADD_LINKAGE_INFO = false; options.MAX_LEVEL = 2; options.ADD_UNCLES = false; options.USE_SEEDS = false; options.TAG_CORES = false; grammar = Grammar.createGrammar(nonfuzzy_structures,options); //} // collect rows TreeMap<Glycan,Row> ret = new TreeMap<Glycan,Row>(); for( Row r : rows ) { if( !r.structure.isFuzzy() ) { ret.put(r.structure,r); } else { if( r.structure.getNoAntennae()>3 ) { continue; } // resolve fuzzy structure Vector<Glycan> all = r.structure.placeAntennae(); Vector<Glycan> resolved = new Vector<Glycan>(); for( Glycan g : all ) { if( grammar.accept(g) ) resolved.add(g); } if( resolved.size()==0 ) System.err.println("Cannot resolve some fuzzy structures"); //throw new Exception("Cannot resolve some fuzzy structures"); // distribute intensities double weight = 1./(double)resolved.size(); for( Glycan g : resolved ) { Row dest = ret.get(g); if( dest==null ) { String name = GlycanRenderer.makeCompositionTextPlain(g.getComposition(false)); dest = new Row(g,name,fg_size,sg_size); ret.put(g,dest); } dest.addIntensities(r,weight); } } } return new Vector<Row>(ret.values()); } private static void add(Map<FragmentEntry,Integer> ret, Glycan g, String name, int quantity) { FragmentEntry fe = new FragmentEntry(g,name); Integer old_count = ret.get(fe); if( old_count==null ) ret.put(fe,quantity); else ret.put(fe,old_count+quantity); } private static Map<FragmentEntry,Integer> getMonosaccharides(Glycan structure) { HashMap<FragmentEntry,Integer> ret = new HashMap<FragmentEntry,Integer>(); getMonosaccharides(ret,structure.getRoot()); return ret; } private static void getMonosaccharides(Map<FragmentEntry,Integer> ret, Residue node) { if( node.isSaccharide() || node.isSubstituent() ) { // make structure Residue r = node.cloneResidue(); r.setPreferredPlacement(null); Glycan g = new Glycan(r,true,new MassOptions()); String name = node.getTypeName(); add(ret,g,name,1); } for( Linkage l : node.getChildrenLinkages() ) getMonosaccharides(ret,l.getChildResidue()); } private static Map<FragmentEntry,Integer> getDisaccharides(Glycan structure) throws Exception { if( structure.isFuzzy() ) throw new Exception("Cannot get disaccharides from fuzzy structures"); HashMap<FragmentEntry,Integer> ret = new HashMap<FragmentEntry,Integer>(); getDisaccharides(ret,structure.getRoot()); return ret; } private static void getDisaccharides(Map<FragmentEntry,Integer> ret, Residue node) { Residue parent = node.getParent(); if( parent!=null && ((parent.isSaccharide() || parent.isSubstituent()) && (node.isSaccharide() || node.isSubstituent())) ) { // make structure Residue rp = parent.cloneResidue(); rp.setPreferredPlacement(null); Residue rc = node.cloneResidue(); rc.setPreferredPlacement(null); rp.addChild(rc,node.getParentLinkage().getBonds()); Glycan g = new Glycan(rp,true,new MassOptions()); String name = rc.getTypeName() + node.getParentLinkage().toIupac() + rp.getTypeName(); add(ret,g,name,1); } for( Linkage l : node.getChildrenLinkages() ) getDisaccharides(ret,l.getChildResidue()); } private static Map<FragmentEntry,Integer> getCore(Glycan structure) throws Exception { HashMap<FragmentEntry,Integer> ret = new HashMap<FragmentEntry,Integer>(); // sort cores by mass descending GWSParser parser = new GWSParser(); Vector<FragmentEntry> cores_ordered = new Vector<FragmentEntry>(); for( CoreType ct : CoreDictionary.getCores() ) { Glycan core = new Glycan(ct.newCore(),true,new MassOptions()); cores_ordered.add(new FragmentEntry(core,ct.getDescription())); } Collections.sort(cores_ordered, new Comparator<FragmentEntry>() { public int compare(FragmentEntry f1, FragmentEntry f2) { if( f1.mz_ratio>f2.mz_ratio ) return -1; if( f1.mz_ratio<f2.mz_ratio ) return 1; return 0; } }); // match structures with all cores for( FragmentEntry core : cores_ordered ) { if( structure.contains(core.fragment,true,false) ) { ret.put(core,1); return ret; } } return ret; } private static Map<FragmentEntry,Integer> getTerminals(Glycan structure) throws Exception { HashMap<FragmentEntry,Integer> ret = new HashMap<FragmentEntry,Integer>(); // sort terminals by mass descending GWSParser parser = new GWSParser(); Vector<FragmentEntry> terminals_ordered = new Vector<FragmentEntry>(); for( TerminalType tt : TerminalDictionary.getTerminals() ) { Glycan terminal = new Glycan(tt.newTerminal(),true,new MassOptions()); terminals_ordered.add(new FragmentEntry(terminal,tt.getDescription())); } Collections.sort(terminals_ordered, new Comparator<FragmentEntry>() { public int compare(FragmentEntry f1, FragmentEntry f2) { if( f1.mz_ratio>f2.mz_ratio ) return -1; if( f1.mz_ratio<f2.mz_ratio ) return 1; return 0; } }); // match structures with all terminals for( FragmentEntry terminal : terminals_ordered ) { int count = structure.count(terminal.fragment,false,true); if( count>0 ) ret.put(terminal,count); } return ret; } }