/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.viewer.qa; import org.fhcrc.cpl.toolbox.filehandler.TabLoader; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.proteomics.filehandler.ProtXmlReader; import org.fhcrc.cpl.toolbox.proteomics.filehandler.ProteinGroup; import org.apache.log4j.Logger; import javax.xml.stream.XMLStreamException; import java.io.*; import java.util.*; import java.util.List; /** * test */ public class QAUtilities { protected static Logger _log = Logger.getLogger(QAUtilities.class); public static void createAllProtText(File allProtXmlFile, File protGeneFile, File outFile, float minProteinProphet) throws IOException, XMLStreamException { Map<String,List<String>> ipiGeneListMap = QAUtilities.loadIpiGeneListMap(protGeneFile); ApplicationContext.setMessage("Gene map loaded. " + ipiGeneListMap.size() + " proteins with genes"); String headerLine = "*Group\tGroup_Probability\tProtein_Probability\tL2H_Mean\tL2H_StdDev\tRatio_Peps\tH2L_Mean\tH2L_StdDev\tnum_Indistinguishable_Proteins\tIndistinguishable_Proteins\tnum_Genes\tGene_Symbol\tPeptides"; PrintWriter pw = new PrintWriter(outFile); pw.println(headerLine); ProtXmlReader protXmlReader = new ProtXmlReader(allProtXmlFile); ProtXmlReader.ProteinGroupIterator groupIterator = protXmlReader.iterator(); while (groupIterator.hasNext()) { ProteinGroup proteinGroup = groupIterator.next(); List<ProtXmlReader.Protein> proteins = proteinGroup.getProteins(); for (int i=0; i<proteins.size(); i++) { ProtXmlReader.Protein protein = proteins.get(i); if (minProteinProphet > 0 && protein.getProbability() < minProteinProphet) continue; //For proteins after the first, "group" number is actually group number with _# after it, where _# is //the count StringBuffer lineBuf = new StringBuffer("" + proteinGroup.getGroupNumber()); if (i>0) lineBuf.append("_" + i); lineBuf.append("\t" + proteinGroup.getGroupProbability()); lineBuf.append("\t" + protein.getProbability()); ProtXmlReader.QuantitationRatio ratio = protein.getQuantitationRatio(); if (ratio == null) lineBuf.append("\t-666\t-666\t-666\t-666\t-666"); else { lineBuf.append("\t" + ratio.getRatioMean()); lineBuf.append("\t" + ratio.getRatioStandardDev()); lineBuf.append("\t" + ratio.getRatioNumberPeptides()); lineBuf.append("\t" + ratio.getHeavy2lightRatioMean()); lineBuf.append("\t" + ratio.getHeavy2lightRatioStandardDev()); } StringBuffer indistProteinNamesBuf = new StringBuffer(""); List<String> indistProteinNames = protein.getIndistinguishableProteinNames(); if (indistProteinNames == null) indistProteinNames = new ArrayList<String>(1); indistProteinNames.add(protein.getProteinName()); for (int j=0; j<indistProteinNames.size(); j++) { if (j>0) indistProteinNamesBuf.append(";"); indistProteinNamesBuf.append(indistProteinNames.get(j)); } lineBuf.append("\t" + indistProteinNames.size()); lineBuf.append("\t" + indistProteinNamesBuf); //num_genes, gene_symbol Set<String> genesAllIndistProteins = new HashSet<String>(); for (String indistProteinName : indistProteinNames) { List<String> geneList = ipiGeneListMap.get(indistProteinName); if (geneList != null) genesAllIndistProteins.addAll(geneList); } lineBuf.append("\t" + genesAllIndistProteins.size()); StringBuffer geneSymbolBuf = new StringBuffer(""); boolean firstGene = true; for (String gene : genesAllIndistProteins) { if (!firstGene) geneSymbolBuf.append(";"); geneSymbolBuf.append(gene); firstGene = false; } if (geneSymbolBuf.length() == 0) geneSymbolBuf.append("NA"); lineBuf.append("\t" + geneSymbolBuf); StringBuffer peptidesBuf = new StringBuffer(""); List<ProtXmlReader.Peptide> peptides = protein.getPeptides(); int numWrittenPeptides = 0; Set<String> peptidesThisProtein = new HashSet<String>(); for (ProtXmlReader.Peptide peptide : peptides) { if (peptide.isContributingEvidence() && peptide.isNondegenerateEvidence()) { if (numWrittenPeptides>0) peptidesBuf.append(";"); peptidesBuf.append(peptide.getPeptideSequence()); peptidesThisProtein.add(peptide.getPeptideSequence()); numWrittenPeptides++; } } if (peptidesBuf.length() == 0) peptidesBuf.append("NA"); lineBuf.append("\t" + peptidesBuf); pw.println(lineBuf); pw.flush(); } } pw.flush(); pw.close(); } public static Map<String,List<String>> loadGeneIpiListMap(File protGeneFile) throws IOException { Map<String,List<String>> geneIpiListMap = new HashMap<String,List<String>>(); ApplicationContext.setMessage("Loading gene mapping file..."); TabLoader tabLoader = new TabLoader(protGeneFile); tabLoader.setColumns(new TabLoader.ColumnDescriptor[] { new TabLoader.ColumnDescriptor("protein", String.class), new TabLoader.ColumnDescriptor("genes", String.class)}); TabLoader.TabLoaderIterator iter = tabLoader.iterator(); while (iter.hasNext()) { Map rowMap = (Map) iter.next(); String genes = (String) rowMap.get("genes"); if (genes == null) continue; String[] geneArray = genes.toString().split("//"); for (String gene : geneArray) { List<String> proteins = geneIpiListMap.get(gene); if (proteins == null) { proteins = new ArrayList<String>(); geneIpiListMap.put(gene, proteins); } String protein = (String) rowMap.get("protein"); proteins.add(protein); } } return geneIpiListMap; } public static Map<String,List<String>> loadIpiGeneListMap(File protGeneFile) throws IOException { Map<String,List<String>> ipiGeneListMap = new HashMap<String,List<String>>(); ApplicationContext.setMessage("Loading gene mapping file..."); TabLoader tabLoader = new TabLoader(protGeneFile); tabLoader.setColumns(new TabLoader.ColumnDescriptor[] { new TabLoader.ColumnDescriptor("protein", String.class), new TabLoader.ColumnDescriptor("genes", String.class)}); TabLoader.TabLoaderIterator iter = tabLoader.iterator(); while (iter.hasNext()) { Map rowMap = (Map) iter.next(); String protein = (String) rowMap.get("protein"); if (protein != null) { Object genesStringObj = rowMap.get("genes"); if (genesStringObj == null) continue; String[] geneArray = genesStringObj.toString().split("//"); List<String> geneList = new ArrayList<String>(); for (String gene : geneArray) { if (gene != null && gene.length() > 0) geneList.add(gene); } if (!geneList.isEmpty()) ipiGeneListMap.put(protein, geneList); } } return ipiGeneListMap; } }