/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.ms2; import org.fhcrc.cpl.toolbox.statistics.BasicStatistics; import org.apache.log4j.Logger; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.commandline.CommandLineModuleExecutionException; import java.io.*; import java.util.*; /** * This class doesn't do parsing of ProtXML files itself. It uses GeneMappingReader for that. * * This is for utilities to work with the output of GeneMappingReader */ public class GeneMappingUtilities { protected static Logger _log = Logger.getLogger(GeneMappingUtilities.class); public static Map<String,List<String>> loadIPIGeneMap(File proteinGeneMapFile) throws CommandLineModuleExecutionException { BufferedReader br = null; Map<String,List<String>> ipiGeneMap = new HashMap<String,List<String>>(); try { br = new BufferedReader(new FileReader(proteinGeneMapFile)); String line = null; while ((line = br.readLine()) != null) { String[] words = line.split("\t"); String ipi = words[0]; if (words.length < 2 || words[1] == null || words[1].length() < 1) continue; String geneListString = words[1]; String[] geneArray = geneListString.split("//"); List<String> geneList = new ArrayList<String>(); for (String gene : geneArray) geneList.add(gene); ipiGeneMap.put(ipi, geneList); } ApplicationContext.setMessage("Loaded " + ipiGeneMap.size() + " genes from lookup file"); } catch (Exception e) { throw new CommandLineModuleExecutionException("Failed to load gene file: ", e); } finally { if (br != null) try {br.close();} catch (Exception e) {} } return ipiGeneMap; } public static class InfoForGene { protected List<Double> ratios; protected List<Double> intensities1; protected List<Double> intensities2; protected Set<String> proteins; protected List<String> peptides; protected String symbol; public InfoForGene(String symbol, List<String> peptides, List<Double> ratios, List<Double> intensities1, List<Double> intensities2, Set<String> proteins) { this.symbol = symbol; this.ratios = ratios; this.proteins = proteins; this.peptides = peptides; this.intensities1 = intensities1; this.intensities2 = intensities2; } public InfoForGene(String symbol) { this(symbol, new ArrayList<String>(), new ArrayList<Double>(), new ArrayList<Double>(), new ArrayList<Double>(), new HashSet<String>() ); } public List<Double> getRatios() { return ratios; } public void setRatios(List<Double> ratios) { this.ratios = ratios; } public List<Double> getIntensities1() { return intensities1; } public void setIntensities1(List<Double> intensities1) { this.intensities1 = intensities1; } public List<Double> getIntensities2() { return intensities2; } public void setIntensities2(List<Double> intensities2) { this.intensities2 = intensities2; } public Set<String> getProteins() { return proteins; } public void setProteins(Set<String> proteins) { this.proteins = proteins; } public String getSymbol() { return symbol; } public void setSymbol(String symbol) { this.symbol = symbol; } public List<String> getPeptides() { return peptides; } public void setPeptides(List<String> peptides) { this.peptides = peptides; } public String toString() { List<Double> ratiosWithoutZeroes = new ArrayList<Double>(); boolean hasNumeratorNonzero = false; //System.err.println("ratios: " + getRatios().size() + ", i1: " + intensities1.size() + ", i2: " + intensities2.size()); for (int i=0; i<getRatios().size(); i++) { //System.err.println("\ti1: " + intensities1.get(i)); if (intensities1.get(i) > 0) { hasNumeratorNonzero = true; if (intensities2.get(i) > 0) ratiosWithoutZeroes.add(ratios.get(i)); } } if (ratiosWithoutZeroes.size() == 0) { if (hasNumeratorNonzero) ratiosWithoutZeroes.add(20.0); else ratiosWithoutZeroes.add(0.0); } int numNumeratorPeptides = 0; int numDenomenatorPeptides = 0; int numNumerDenomPeptides = 0; for (int i=0; i<ratios.size(); i++) { boolean numer = false; boolean denom = false; if (intensities1.get(i) > 0) numer = true; if (intensities2.get(i) > 0) denom = true; if (numer) { numNumeratorPeptides++; if (denom) numNumerDenomPeptides++; } if (denom) numDenomenatorPeptides++; } double[] ratiosForMean = new double[ratios.size()]; for(int i=0; i<ratios.size(); i++) if (ratios.get(i) == 0) ratiosForMean[i] = 0.0001; else ratiosForMean[i] = ratios.get(i); double meanRatio = BasicStatistics.geometricMean(ratiosForMean); return getSymbol() + "\t" + meanRatio + "\t" + BasicStatistics.geometricMean(ratiosWithoutZeroes) + "\t" + numNumeratorPeptides + "\t" + numDenomenatorPeptides + "\t" + numNumerDenomPeptides + "\t" + BasicStatistics.standardDeviation(ratiosWithoutZeroes) + "\t" + getProteins().size() + "\t" + (BasicStatistics.mean(getIntensities1()) + BasicStatistics.mean(getIntensities2())); } public static void writeGeneRatioFile(Collection<InfoForGene> infosForGenes, File outFile) throws CommandLineModuleExecutionException { PrintWriter outPW = null; try { outPW = new PrintWriter(outFile); outPW.println("gene\tratio\tratio_no_zeroes\tnum_numerator_peptides\tnum_denominator_peptides\tnum_num_denom_peptides\trationozero_std_dev\tnum_proteins\tmean_sum_int1int2"); for (InfoForGene infoForGene : infosForGenes) { outPW.println(infoForGene.toString()); outPW.flush(); } } catch(Exception e) { throw new CommandLineModuleExecutionException(e); } finally { if (outPW != null) outPW.close(); } } } }