/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.toolbox.proteomics.feature; import org.fhcrc.cpl.toolbox.datastructure.Pair; import org.fhcrc.cpl.toolbox.datastructure.Tree2D; import java.util.Arrays; import java.util.ArrayList; import java.util.Iterator; import java.util.Collections; import java.io.File; import java.awt.*; /** * Created by IntelliJ IDEA. * User: mbellew * Date: May 23, 2005 * Time: 11:30:21 AM */ public class AnalyzeICAT { public static int DEFAULT_MAX_LABEL_COUNT = 3; protected static ArrayList prePopulatedTags = null; public static class IsotopicLabel { //an identifier for this label. Human-readable protected String name = ""; protected float light; // weight of base tag protected float heavy; // delta weight (e.g. 9.03 for icat) protected char residue = ' '; // resiude labeled; ' ' means label affects non-residue (e.g. C or N terminus) protected int maxLabelCount; // maximum number of labels to consider per peptide public IsotopicLabel(String name, float light, float heavy, char residue, int maxLabelCount) { this(light, heavy, residue, maxLabelCount); setName(name); } public IsotopicLabel(float light, float heavy, char residue, int maxLabelCount) { setLight(light); setHeavy(heavy); setResidue(residue); setMaxLabelCount(maxLabelCount); } /** * Parse an isotopic label string. An example string representation is "100.0+4.05#3@C" to * be parsed as floating point light tag weight, plus symbol, delta weight for the heavy * label, number symbol, maximum number of labels to consider, at sign, residue to be * labeled. * * If the maximum number of labels to consider is left off, it defaults to 3. If supplied, * this must be positive. * If the residue to be labeled is left off, it defaults to a blank to indicate that the * labeling scheme a part of the peptide other than a specific residue (e.g. the C or N terminus). */ public IsotopicLabel(String s) { float light, heavy; char residue = ' '; int maxLabelCount = DEFAULT_MAX_LABEL_COUNT; int plus = s.indexOf("+"); int hash = s.indexOf("#"); int at = s.indexOf("@"); if (plus == -1) throw new IllegalArgumentException("Malformed label string; expected light tag weight + delta mass: " + s); if (hash != -1 && hash < plus) throw new IllegalArgumentException("Malformed label string; '#' must follow '+': " + s); if (at != -1 && (at < plus || at < hash)) throw new IllegalArgumentException("Malformed label string; '@' must follow '+' and '#': " + s); light = Float.parseFloat(s.substring(0,plus)); if ( -1 != hash ) heavy = Float.parseFloat(s.substring(plus+1,hash)); else if ( -1 != at ) heavy = Float.parseFloat(s.substring(plus+1,at)); else heavy = Float.parseFloat(s.substring(plus+1)); if ( -1 != hash ) if ( -1 != at ) maxLabelCount = Integer.parseInt(s.substring(hash+1,at)); else maxLabelCount = Integer.parseInt(s.substring(hash+1)); if ( -1 != at ) residue = s.charAt(at+1); setLight(light); setHeavy(heavy); setResidue(residue); setMaxLabelCount(maxLabelCount); } public String toString() { return "" + getLight() + "+" + getHeavy() + (getMaxLabelCount() == DEFAULT_MAX_LABEL_COUNT ? "" : "#" + getMaxLabelCount()) + (getResidue() == ' ' ? "" : "@" + getResidue()); } public String getName() { return name; } private void setName(String name) { this.name = name; } public float getLight() { return light; } private void setLight(float light) { if (light < 0.f) throw new IllegalArgumentException("Light tag weight must be non-negative"); this.light = light; } public float getHeavy() { return heavy; } private void setHeavy(float heavy) { if (heavy <= 0.f) throw new IllegalArgumentException("Heavy tag weight must be positive"); this.heavy = heavy; } public char getResidue() { return residue; } private void setResidue(char residue) { if ( residue != ' ' && (residue < 'A' || residue > 'Y' ) ) throw new IllegalArgumentException("Illegal residue character '" + residue + "'"); this.residue = residue; } public int getMaxLabelCount() { return maxLabelCount; } private void setMaxLabelCount(int maxLabelCount) { if ( maxLabelCount <= 0 ) throw new IllegalArgumentException("maxLabelCount must be at least 1"); this.maxLabelCount = maxLabelCount; } } //dhmay changing, consolidating default into the one arraylist // public static IsotopicLabel icatLabel = new IsotopicLabel( 227.1263F, 9.0297F, 'C', DEFAULT_MAX_LABEL_COUNT); // NOTE: I computed these myself (mbellew) public static IsotopicLabel icatLabel = (IsotopicLabel) getPrePopulatedLabels().get(0); //constants used to specify how mass tolerance should be calculated public static final int DELTA_MASS_ABSOLUTE = 0; public static final int DELTA_MASS_PPM = 1; //defaults for mass and time tolerances public static final float DEFAULT_DELTA_MASS = 0.2F; public static final float DEFAULT_DELTA_TIME = 10.0F; public static final int DEFAULT_DELTA_MASS_TYPE = DELTA_MASS_ABSOLUTE; /** * Utility method to calculate the absolute mass tolerance, given a mass tolerance * parameter that may be absolute or relative * @param centerMass * @param deltaMass * @param deltaMassType * @return */ protected static float calculateAbsoluteDeltaMass(float centerMass, float deltaMass, int deltaMassType) { if (deltaMassType == DELTA_MASS_ABSOLUTE) return deltaMass; //deltamass must be in ppm return (deltaMass * centerMass) / 1000000; } /** * Uses default values for mass and time tolerance * @param featuresIN * @param label * @return */ public static ArrayList analyze1(Feature[] featuresIN, IsotopicLabel label) { return analyze1(featuresIN, label, DEFAULT_DELTA_MASS, DELTA_MASS_ABSOLUTE, DEFAULT_DELTA_TIME); } /** * can be run on deconvoluted or non-deconvoluted feature set * @param featuresIN * @param label * @param deltaMass * @param deltaMassType absolute or ppm * @param deltaTime * @return */ public static ArrayList analyze1(Feature[] featuresIN, IsotopicLabel label, float deltaMass, int deltaMassType, float deltaTime) { Feature[] features = (Feature[])featuresIN.clone(); Arrays.sort(features, Spectrum.comparePeakMzAsc); Tree2D tree = new Tree2D(); for (int i = 0; i < features.length; i++) { Feature feature = features[i]; feature.excluded = false; tree.add(feature.mass, feature.time, feature); } ArrayList pairs = new ArrayList(); ArrayList heavies = new ArrayList(); for (int i = 0; i < features.length; i++) { Feature light = features[i]; if (light.excluded) continue; float absoluteDeltaMass = calculateAbsoluteDeltaMass(light.mass, deltaMass, deltaMassType); tree.getPoints(light.mass + label.getHeavy()-absoluteDeltaMass, light.time-deltaTime, light.mass+label.getHeavy()+absoluteDeltaMass, light.time+deltaTime, heavies); if (heavies.isEmpty()) continue; Feature heavy = null; for (Iterator it = heavies.iterator(); it.hasNext();) { Feature next = (Feature)it.next(); if (next.charge != light.charge) continue; heavy = next; } if (null == heavy) continue; heavy.excluded = true; Pair pair = new Pair(light, heavy); pairs.add(pair); } return pairs; // (Pair[])pairs.toArray(new Pair[0]); } /** * Uses default values for mass and time tolerance * @param featuresIN * @param label * @return */ public static ArrayList analyze(Feature[] featuresIN, IsotopicLabel label) { return analyze(featuresIN, label, DEFAULT_DELTA_MASS, DELTA_MASS_ABSOLUTE, DEFAULT_DELTA_TIME); } /**In some way this seems a lot like deconvolute (FeatureSet.deconvolute()). * However, in that case we can group features by mass. In this case, we don't * know the adjusted mass until after we align (we need to know the number of * modifications). * * @param featuresIN * @param label * @param deltaMass * @param deltaMassType absolute or ppm * @param deltaTime * @return */ public static ArrayList analyze(Feature[] featuresIN, IsotopicLabel label, float deltaMass, int deltaMassType, float deltaTime) { Feature[] features = (Feature[])featuresIN.clone(); Arrays.sort(features, Spectrum.comparePeakIntensityDesc); Tree2D tree = new Tree2D(); for (int i = 0; i < features.length; i++) { Feature feature = features[i]; feature.excluded = false; tree.add(feature.mass, feature.time, feature); } ArrayList pairs = new ArrayList(); ArrayList candidates = new ArrayList(); ArrayList list = new ArrayList(); for (int i = 0; i < features.length; i++) { Feature anchor = features[i]; if (anchor.excluded) continue; float mass = anchor.mass; float time = anchor.time; int charge = anchor.charge; // match by charge, if data is deconvoluted then all charge==1 // we don't know if we have heavy or light candidates.clear(); for (int count=-label.maxLabelCount ; count<=label.maxLabelCount ; count++) { if (count == 0) continue; //deltaMass may be specified in daltons or in ppm. If ppm, need to calculate //the absolute deltaMass to check around this mass float absoluteDeltaMass = calculateAbsoluteDeltaMass(mass, deltaMass, deltaMassType); tree.getPoints(mass + count*label.getHeavy()-absoluteDeltaMass, time-deltaTime, mass+count*label.getHeavy()+absoluteDeltaMass, time+deltaTime, list); if (list.isEmpty()) continue; Feature candidate = null; for (Iterator it = list.iterator(); it.hasNext();) { Feature next = (Feature)it.next(); if (next.charge != charge) continue; if (next.excluded) continue; if (null == candidate || next.totalIntensity > candidate.totalIntensity) candidate = next; } if (null != candidate) candidates.add(candidate); } if (candidates.isEmpty()) continue; candidates.add(anchor); Collections.sort(candidates, Spectrum.comparePeakMassAsc); // 99% of the time there is exactly one pair Pair pair = null; if (candidates.size() == 2) { pair = new Pair(candidates.get(0), candidates.get(1)); } else { // what range of values do we have Feature first = (Feature)candidates.get(0); Feature last = (Feature)candidates.get(candidates.size()-1); int range = Math.round((last.mass-first.mass)/label.getHeavy()); assert(range+1 >= candidates.size()); // slot them into position int anchorIndex=0; Feature[] slots = new Feature[range+1]; for (int f=0 ; f<candidates.size() ; f++) { Feature feature = (Feature)candidates.get(f); // System.out.println("" + (feature.mass-anchor.mass) + "\t" + feature.toString()); int s = Math.round((feature.mass-first.mass)/label.getHeavy()); slots[s] = feature; if (feature == anchor) anchorIndex = f; } // If we had peptide identifications, this would be different, // we're just guessing here... // OK, now what. Maybe they pair up nicely if (range+1 == candidates.size() && candidates.size()%2==0) { int i1 = (anchorIndex/2)*2; int i2 = i1+1; pair = new Pair(slots[i1],slots[i2]); assert anchor == pair.first || anchor == pair.second; } else { // OK they don't pair up nicely, there's no guarantee that our anchor peak is the best match here. // I'm going to use a completely arbitrary distance score to find the best match // if it doesn't contain the anchor throw them out and try again int remaining = candidates.size(); while (remaining > 1) { // find best pair, with some hokey distance measure // This case doesn't happen often, therefore this is not tested very much either int bestA = -1, bestB = -1; double bestDist = Double.MAX_VALUE; for (int a=0 ; a<range ; a++) { if (slots[a] == null) continue; for (int b=a+1 ; b<= range; b++) { if (slots[b] == null) continue; Feature f = slots[a]; Feature g = slots[b]; double dist = Math.pow(a-b,2) + Math.pow(f.time-g.time,2) + Math.pow(Math.log(f.totalIntensity)-Math.log(g.totalIntensity),2); if (dist < bestDist) { bestDist = dist; bestA = a; bestB = b; } } } if (slots[bestA] == anchor || slots[bestB] == anchor) { pair = new Pair(slots[bestA],slots[bestB]); break; } slots[bestA] = null; slots[bestB] = null; remaining-=2; } } } if (null == pair) { anchor.excluded = true; } else { Feature f = (Feature)pair.first; Feature g = (Feature)pair.second; f.excluded = true; g.excluded = true; if (g.mass < f.mass) { pair.first = g; pair.second = f; } pairs.add(pair); } } return pairs; } public static void main(String[] args) throws Exception { //MSRun run = MSRun.load("E:\\mzxml\\3protmix\\032505KD013_ICAT_3mix.mzXML"); FeatureSet fs = new FeatureSet(new File("E:\\mzxml\\3protmix\\032505KD013_ICAT_3mix.peptides.tsv"), Color.BLACK); ArrayList list = analyze(fs.getFeatures(), icatLabel); System.out.println("mz\tscan\tmass\ttotalIntensity\tdeltaMasss\ta/a+b"); for (int i = 0; i < list.size(); i++) { Pair pair = (Pair)list.get(i); Feature a = (Feature)pair.first; Feature b = (Feature)pair.second; System.out.println("" + a.mz + "\t" + a.scan + "\t" + a.mass + "\t" + (a.totalIntensity) + "\t" + (b.mass-a.mass) + "\t" + (a.totalIntensity / b.totalIntensity)); } } /** * Returns an ArrayList of pre-populated IsotopicLabels. * If the list isn't yet populated, populate it here * * @return an arraylist of pre-populated IsotopicLabels */ public static ArrayList getPrePopulatedLabels() { if (prePopulatedTags == null) { prePopulatedTags = new ArrayList(); prePopulatedTags.add(new IsotopicLabel("Cleavable ICAT",227.1263F,9.0297F,'C', DEFAULT_MAX_LABEL_COUNT)); prePopulatedTags.add(new IsotopicLabel("O18",0.0F,4.0085F,' ',1)); prePopulatedTags.add(new IsotopicLabel("Silac",0.0F,6.0201F,'K', DEFAULT_MAX_LABEL_COUNT)); prePopulatedTags.add(new IsotopicLabel("N-terminal",100.4F,4.0313F, 'K',DEFAULT_MAX_LABEL_COUNT)); prePopulatedTags.add(new IsotopicLabel("Acrylamide (D0/D3)",71.03657F,3.0188F,'C', DEFAULT_MAX_LABEL_COUNT)); } return prePopulatedTags; } }