/* * Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fhcrc.cpl.toolbox.proteomics.feature.filehandler; import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet; import org.fhcrc.cpl.toolbox.proteomics.feature.Feature; import org.fhcrc.cpl.toolbox.proteomics.feature.AnalyzeICAT; import org.fhcrc.cpl.toolbox.proteomics.feature.FeaturePepXmlWriter; import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef; import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.IsotopicLabelExtraInfoDef; import org.fhcrc.cpl.toolbox.filehandler.TempFileManager; import org.fhcrc.cpl.toolbox.filehandler.SimpleXMLStreamReader; import org.fhcrc.cpl.toolbox.ApplicationContext; import org.fhcrc.cpl.toolbox.proteomics.PeptideGenerator; import org.fhcrc.cpl.toolbox.proteomics.MS2Modification; import org.fhcrc.cpl.toolbox.proteomics.ModifiedAminoAcid; import org.fhcrc.cpl.toolbox.proteomics.QuantitationUtilities; import org.fhcrc.cpl.toolbox.proteomics.filehandler.*; import org.apache.log4j.Logger; import javax.xml.stream.XMLStreamException; import java.io.*; import java.util.List; import java.util.ArrayList; import java.util.Iterator; /** * File handler for native msInspect feature files */ public class PepXMLFeatureFileHandler extends BaseFeatureSetFileHandler implements FeatureSetFileHandler { static Logger _log = Logger.getLogger(PepXMLFeatureFileHandler.class); protected int firstSpectrumQueryIndex = 1; public static final int QUANT_ALGORITHM_Q3 = 0; public static final int QUANT_ALGORITHM_XPRESS = 1; public static final String FILE_TYPE_NAME = "PEPXML"; protected static PepXMLFeatureFileHandler singletonInstance = null; protected String _searchEngine = BasePepXmlWriter.SEARCH_ENGINE_XTANDEM_COMET; public static PepXMLFeatureFileHandler getSingletonInstance() { if (singletonInstance == null) singletonInstance = new PepXMLFeatureFileHandler(); return singletonInstance; } /** * Load all FeatureSets * @param file * @return * @throws IOException */ public List<FeatureSet> loadAllFeatureSets(File file) throws IOException { PepXMLFeatureSetIterator fsi = new PepXMLFeatureSetIterator(file); List<FeatureSet> result = new ArrayList<FeatureSet>(); while (fsi.hasNext()) result.add(fsi.next()); return result; } /** * Load first FeatureSet in a file * @param file * @return * @throws IOException */ public FeatureSet loadFeatureSet(File file) throws IOException { PepXMLFeatureSetIterator fsi = new PepXMLFeatureSetIterator(file); return fsi.next(); } /** * Get an iterator on the FeatureSets in a pepXML file */ public static class PepXMLFeatureSetIterator implements Iterator<FeatureSet> { protected PepXmlLoader pepXmlLoader; PepXmlLoader.FractionIterator fractionIterator; protected File sourceFile; public PepXMLFeatureSetIterator(File file) throws IOException { try { sourceFile = file; pepXmlLoader = new PepXmlLoader(sourceFile, _log); _log.debug("Instantiated PepXmlLoader"); fractionIterator = pepXmlLoader.getFractionIterator(); _log.debug("Got FractionIterator"); } catch (XMLStreamException xmlse) { xmlse.printStackTrace(System.err); throw new IOException("XML Stream Failure while loading XML file. Message: " + xmlse.getMessage()); } } public void remove() { //do nothing } public FeatureSet next() { _log.debug("Accessing next fraction"); FeatureSet newFeatureSet = new FeatureSet(); newFeatureSet.setSourceFile(sourceFile); newFeatureSet.addExtraInformationType(MS2ExtraInfoDef.getSingletonInstance()); PepXmlLoader.PepXmlFraction fraction = fractionIterator.next(); PepXMLFeatureFileHandler.getSingletonInstance().setFeatureSetPropertiesFromFraction( fraction, newFeatureSet); Feature[] features = PepXMLFeatureFileHandler.getSingletonInstance().getFeaturesFromPepXmlFraction( fraction, pepXmlLoader, newFeatureSet).toArray(new Feature[0]); newFeatureSet.setFeatures(features); return newFeatureSet; } public boolean hasNext() { return fractionIterator.hasNext(); } } /** * nterm and cterm mods handled same way as aminoacid * @param fraction * @param featureSet */ protected void setFeatureSetPropertiesFromFraction(PepXmlLoader.PepXmlFraction fraction, FeatureSet featureSet) { MS2Modification[] modifications = fraction.getModifications().toArray(new MS2Modification[0]); if (modifications != null && modifications.length > 0) { for (MS2Modification ms2Mod : modifications) { if (!ms2Mod.getAminoAcid().equals("n") && !ms2Mod.getAminoAcid().equals("c")) MS2ExtraInfoDef.updateMS2ModMassOrDiff(ms2Mod); } } MS2ExtraInfoDef.correctMS2ModMasses(modifications); _log.debug("\tloaded " + (modifications == null ? 0 : modifications.length + " MS2 modifications")); MS2ExtraInfoDef.setFeatureSetModifications(featureSet, modifications); String databasePath = fraction.getDatabaseLocalPath(); if (databasePath != null) MS2ExtraInfoDef.setFeatureSetSearchDatabasePath(featureSet, databasePath); int maxCleavages = fraction.getSearchConstraintMaxInternalCleavages(); if (maxCleavages > 0) MS2ExtraInfoDef.setFeatureSetSearchConstraintMaxIntCleavages(featureSet, maxCleavages); int minTermini = fraction.getSearchConstraintMaxInternalCleavages(); if (minTermini > 0) MS2ExtraInfoDef.setFeatureSetSearchConstraintMinTermini(featureSet, minTermini); String baseName = fraction.getDataBasename(); if (baseName != null) MS2ExtraInfoDef.setFeatureSetBaseName(featureSet, baseName); _log.debug("\tmin termini=" + minTermini + ", max cleavages=" + maxCleavages); } public FeatureSet createFeatureSetFromPepXMLFraction(PepXmlLoader.PepXmlFraction fraction, PepXmlLoader pepXmlLoader) { FeatureSet result = new FeatureSet(); List<Feature> featureList = getFeaturesFromPepXmlFraction(fraction, pepXmlLoader, result); result.setFeatures(featureList.toArray(new Feature[featureList.size()])); setFeatureSetPropertiesFromFraction(fraction, result); return result; } /** * get a list of all features contained in a PepXmlFraction. * Exposing this for a tool that just pulls one fraction from a pepXml file. * * Adds the feature information types inferred from the feature list to resultFeatureSet * @param fraction * @return */ public List<Feature> getFeaturesFromPepXmlFraction(PepXmlLoader.PepXmlFraction fraction, PepXmlLoader pepXmlLoader, FeatureSet resultFeatureSet) { _log.debug("getFeaturesFromPepXmlFraction 1"); boolean hasQuant = false; int quantAlgorithmType = -1; AnalyzeICAT.IsotopicLabel quantIsotopicLabel = null; List<RelativeQuantAnalysisSummary> quantSummaryList = pepXmlLoader.getQuantSummaries(); if (quantSummaryList != null && quantSummaryList.size() == 1) { _log.debug("Has quantitation summary. Determining type..."); try { RelativeQuantAnalysisSummary quantSummary = quantSummaryList.get(0); String massDiffValueString = quantSummary.getMassDiff(); hasQuant=true; resultFeatureSet.addExtraInformationType(IsotopicLabelExtraInfoDef.getSingletonInstance()); String quantAlgorithmName = quantSummary.getAnalysisAlgorithm(); if (quantAlgorithmName.contains(Q3AnalysisSummary.analysisType)) { quantAlgorithmType = QUANT_ALGORITHM_Q3; IsotopicLabelExtraInfoDef.setFeatureSetAlgorithm(resultFeatureSet, QuantitationUtilities.ALGORITHM_Q3); } else if (quantAlgorithmName.contains(XPressAnalysisSummary.analysisType)) { quantAlgorithmType = QUANT_ALGORITHM_XPRESS; IsotopicLabelExtraInfoDef.setFeatureSetAlgorithm(resultFeatureSet, QuantitationUtilities.ALGORITHM_XPRESS); } _log.debug("Successfully got basic quantitation information. Algorithm: " + (quantAlgorithmType==QUANT_ALGORITHM_Q3 ? "Q3" : "XPress")); double quantMassDiff = 0; //Q3 seems to stick the labeled residue in the mass diff string try { quantMassDiff = Float.parseFloat(massDiffValueString); } catch (Exception e) { massDiffValueString = massDiffValueString.substring(massDiffValueString.indexOf(',') + 1); quantMassDiff = Float.parseFloat(massDiffValueString); } char quantLabeledResidue = quantSummary.getLabeledResidues().charAt(0); if (quantLabeledResidue == 'n') { //TODO: handle n-terminal labels better ApplicationContext.setMessage("Warning: n-terminal label declared. We don't handle that very well yet"); quantLabeledResidue = ' '; } double lightMass = PeptideGenerator.getMasses(true)[quantLabeledResidue]; quantIsotopicLabel = new AnalyzeICAT.IsotopicLabel((float) lightMass, (float) (lightMass + quantMassDiff), quantLabeledResidue, 3); } catch (Exception e) { if (hasQuant) ApplicationContext.setMessage("WARNING: quantitation information loaded, but couldn't get details of label. Error: " + e.getMessage()); else ApplicationContext.setMessage("WARNING: Error loading pepXML quantitation information. Quantitation will be unavailable for this file. Error: " + e.getMessage()); e.printStackTrace(System.err); } } PepXmlLoader.PeptideIterator pi = fraction.getPeptideIterator(); List<Feature> featuresList = new ArrayList<Feature>(); while (pi.hasNext()) { PepXmlLoader.PepXmlPeptide peptide = pi.next(); Feature feature = createFeatureFromPepXmlPeptide(peptide, hasQuant, quantAlgorithmType, quantIsotopicLabel); if (feature != null) featuresList.add(feature); } return featuresList; } public Feature createFeatureFromPepXmlPeptide(PepXmlLoader.PepXmlPeptide peptide, boolean hasQuant, int quantAlgorithmType, AnalyzeICAT.IsotopicLabel quantIsotopicLabel) { ModifiedAminoAcid[] modArray = peptide.getModifiedAminoAcids(); List<ModifiedAminoAcid>[] modificationListArray = null; if (modArray !=null) { //necessary cast modificationListArray = (List<ModifiedAminoAcid>[]) new ArrayList[modArray.length]; for (int i=0; i<modArray.length; i++) if (modArray[i] != null) { modificationListArray[i] = new ArrayList<ModifiedAminoAcid>(); modificationListArray[i].add(modArray[i]); } } List<String> peptideList = new ArrayList<String>(1); peptideList.add(peptide.getTrimmedPeptide()); List<String> proteinList = new ArrayList<String>(); proteinList.add(peptide.getProtein()); proteinList.addAll(peptide.getAlternativeProteins()); List<Integer> altProteinNTTs = peptide.getAlternativeProteinNTTs(); Feature currentFeature = MS2ExtraInfoDef.createMS2Feature( peptide.getScan(), (float) peptide.getCalculatedNeutralMass(), peptide.getCharge(), peptideList, proteinList, modificationListArray); if (peptide.getNTerminalModMass() != 0) MS2ExtraInfoDef.setNtermModMass(currentFeature, peptide.getNTerminalModMass()); if (peptide.getCTerminalModMass() != 0) MS2ExtraInfoDef.setNtermModMass(currentFeature, peptide.getCTerminalModMass()); MS2ExtraInfoDef.setAltProteinNTTs(currentFeature, altProteinNTTs); MS2ExtraInfoDef.setDeltaMass(currentFeature, peptide.getDeltaMass()); MS2ExtraInfoDef.setSearchScores(currentFeature, peptide.getScores()); String prevAA = peptide.getPrevAA(); String nextAA = peptide.getNextAA(); if (prevAA != null) MS2ExtraInfoDef.setPrevAminoAcid(currentFeature, prevAA.charAt(0)); if (peptide.getNextAA() != null) MS2ExtraInfoDef.setNextAminoAcid(currentFeature, nextAA.charAt(0)); //System.err.println("prevaa: " + MS2ExtraInfoDef.getPrevAminoAcid(currentFeature) + ", nextaa: " + MS2ExtraInfoDef.getNextAminoAcid(currentFeature)); int numTrypticEnds = peptide.getNumTolTerm(); //Try to load NTT from pepXML file, but if it wasn't there, try to recalculate it. -1 is a sentinel for //"wasn't there" if (numTrypticEnds < 0) { //WARNING WARNING WARNING!!! //This behavior is trypsin-specific. If another enzyme is used, number of enzymatic //ends will be set incorrectly. String peptideSequence = peptide.getTrimmedPeptide(); //check for start of protein sequence or trypsin digestion at start of peptide (remember proline) if ((prevAA != null && prevAA.startsWith("-")) || ((prevAA != null && (prevAA.startsWith("K") || prevAA.startsWith("R"))) && !peptideSequence.startsWith("P"))) numTrypticEnds++; //check for end of protein sequence or trypsin digestion at end of peptide if ((nextAA != null && nextAA.startsWith("-")) || ((nextAA != null && !nextAA.startsWith("P")) &&(peptide.getTrimmedPeptide().endsWith("K") || peptide.getTrimmedPeptide().endsWith("R")))) numTrypticEnds++; } MS2ExtraInfoDef.setNumEnzymaticEnds(currentFeature, numTrypticEnds); //If retention time is available, grab it. If not, time will have its //default value (0). It's possible to have a mix of set and unset values //in the same FeatureSet. if (peptide.getRetentionTime() != null) currentFeature.setTime(peptide.getRetentionTime().floatValue()); //System.err.println("Delta mass: " + peptide.getDeltaMass() + ", " +MS2ExtraInfoDef.getDeltaMass(currentFeature)); //analysis results: //peptideProphet: pprophet PeptideProphetHandler.PeptideProphetResult ppar = peptide.getPeptideProphetResult(); if (null != ppar) { MS2ExtraInfoDef.setPeptideProphet(currentFeature,ppar.getProbability()); MS2ExtraInfoDef.setFval(currentFeature, ppar.getProphetFval()); String allNttProb = ppar.getAllNttProb(); if (allNttProb != null) MS2ExtraInfoDef.setAllNttProb(currentFeature, allNttProb); } if (hasQuant) { boolean foundResult = false; switch(quantAlgorithmType) { case QUANT_ALGORITHM_Q3: Q3Handler.Q3Result q3ar = peptide.getQ3Result(); if (null != q3ar) { IsotopicLabelExtraInfoDef.setRatio(currentFeature, q3ar.getDecimalRatio()); IsotopicLabelExtraInfoDef.setHeavyIntensity(currentFeature, q3ar.getHeavyArea()); IsotopicLabelExtraInfoDef.setLightIntensity(currentFeature, q3ar.getLightArea()); currentFeature.setIntensity(q3ar.getLightArea()); currentFeature.setTotalIntensity(q3ar.getLightArea()); IsotopicLabelExtraInfoDef.setLightMass(currentFeature, q3ar.getLightMass()); IsotopicLabelExtraInfoDef.setHeavyMass(currentFeature, q3ar.getHeavyMass()); IsotopicLabelExtraInfoDef.setLightFirstScan(currentFeature, q3ar.getLightFirstscan()); IsotopicLabelExtraInfoDef.setLightLastScan(currentFeature, q3ar.getLightLastscan()); IsotopicLabelExtraInfoDef.setHeavyFirstScan(currentFeature, q3ar.getHeavyFirstscan()); IsotopicLabelExtraInfoDef.setHeavyLastScan(currentFeature, q3ar.getHeavyLastscan()); foundResult = true; } break; case QUANT_ALGORITHM_XPRESS: XPressHandler.XPressResult xpar = peptide.getXPressResult(); if (null!= xpar) { //This is a hack. XPress returns an intensity that's the sum of //all the intensity values for one peak over its elution profile. //Really that doesn't correspond exactly to our notion of intensity, //or of total intensity. But I'm setting both. currentFeature.setTotalIntensity(xpar.getLightArea()); currentFeature.setIntensity(xpar.getLightArea()); IsotopicLabelExtraInfoDef.setRatio(currentFeature, xpar.getDecimalRatio()); IsotopicLabelExtraInfoDef.setHeavyIntensity(currentFeature, xpar.getHeavyArea()); IsotopicLabelExtraInfoDef.setLightIntensity(currentFeature, xpar.getLightArea()); IsotopicLabelExtraInfoDef.setLightFirstScan(currentFeature, xpar.getLightFirstscan()); IsotopicLabelExtraInfoDef.setLightLastScan(currentFeature, xpar.getLightLastscan()); IsotopicLabelExtraInfoDef.setHeavyFirstScan(currentFeature, xpar.getHeavyFirstscan()); IsotopicLabelExtraInfoDef.setHeavyLastScan(currentFeature, xpar.getHeavyLastscan()); IsotopicLabelExtraInfoDef.setHeavyMass(currentFeature, xpar.getHeavyMass()); IsotopicLabelExtraInfoDef.setLightMass(currentFeature, xpar.getLightMass()); foundResult = true; } break; } if (foundResult) { IsotopicLabelExtraInfoDef.setLabel(currentFeature, quantIsotopicLabel); } } //TODO: This is arbitrary. Is it excusable?? //pepxml files don't have intensity, unless xpress results are present. //Some software featfeatures, like align, //require intensity strictly >100 to work correctly if (currentFeature.getIntensity() <= 0) currentFeature.setIntensity(200); return currentFeature; } public void saveFeatureSet(FeatureSet featureSet, File outFile) throws IOException { FeaturePepXmlWriter pepXmlWriter = new FeaturePepXmlWriter(featureSet); String baseName = outFile.getName(); if (baseName.contains(".")) baseName = baseName.substring(0, baseName.indexOf(".")); pepXmlWriter.setBaseName(baseName); pepXmlWriter.setFirstSpectrumQueryIndex(firstSpectrumQueryIndex); pepXmlWriter.set_searchEngine(_searchEngine); try { pepXmlWriter.write(outFile); } catch (Exception e) { _log.error("Failed to save pepXML",e); } } /** * * @param pepXmlFiles * @param outFile * @throws IOException */ public void combinePepXmlFiles(List<File> pepXmlFiles, File outFile) throws IOException { PrintWriter outPW = null; _log.debug("Combining " + pepXmlFiles.size() + " pepXML files into one file..."); try { outPW = new PrintWriter(outFile); for (int i=0; i<pepXmlFiles.size(); i++) { File pepXmlFile = pepXmlFiles.get(i); _log.debug("\tProcessing file " + pepXmlFile.getAbsolutePath()); FileReader fr = new FileReader(pepXmlFile); BufferedReader br = new BufferedReader(fr); String line = null; boolean pastHeader = false; boolean reachedFooter = false; while ((line = br.readLine()) != null) { if (line.contains("/msms_pipeline_analysis")) reachedFooter = true; else if (!pastHeader && line.contains("msms_run_summary")) { //if no base_name given if (line.contains("<msms_run_summary>")) { String baseName = pepXmlFile.getName(); if (baseName.contains(".") && baseName.length() > baseName.indexOf(".") + 1) baseName = baseName.substring(baseName.indexOf(".") + 1); line = "<msms_run_summary base_name=\"" + baseName + "\">"; } pastHeader = true; } if ((i==0 || pastHeader) && (!reachedFooter || i==pepXmlFiles.size()-1)) outPW.println(line); outPW.flush(); if (reachedFooter && i<pepXmlFiles.size()-1) break; } outPW.flush(); _log.debug("Saved pepXML file " + outFile.getAbsolutePath()); } } catch (Exception e) { _log.error("Failed to save pepXML",e); throw new IOException("Failed to save pepXML file, " + e.getMessage()); } finally { if (outPW != null) outPW.close(); } } /** * Write an "all.pep.xml" file * @param featureSets * @param outFile * @throws IOException */ public void saveFeatureSets(List<FeatureSet> featureSets, File outFile) throws IOException { try { List<File> tempFiles = new ArrayList<File>(); for (int i=0; i<featureSets.size(); i++) { FeatureSet featureSet = featureSets.get(i); FeaturePepXmlWriter pepXmlWriter = new FeaturePepXmlWriter(featureSet); pepXmlWriter.setFirstSpectrumQueryIndex(firstSpectrumQueryIndex); pepXmlWriter.set_searchEngine(_searchEngine); File tempFile = TempFileManager.createTempFile("saveFeatureSet"+i+".tmp", this); pepXmlWriter.write(tempFile); tempFiles.add(tempFile); } combinePepXmlFiles(tempFiles, outFile); } catch (Exception e) { e.printStackTrace(System.err); throw new IOException(e.getMessage()); } finally { TempFileManager.deleteTempFiles(this); } } /** * Save a FeatureSet * @param featureSet * @param out */ public void saveFeatureSet(FeatureSet featureSet, PrintWriter out) { throw new IllegalArgumentException( "This version of saveFeatureSet not implemented in PepXMLFeatureFileHandler"); } /** * Can this type of file handler handle this specific file? * * @param file * @return * @throws IOException */ public boolean canHandleFile(File file) throws IOException { if (!isXMLFile(file)) { _log.debug("canHandleFile, File is not XML"); return false; } _log.debug("canHandleFile, File is XML..."); FileInputStream fis = null; boolean result = false; try { fis = new FileInputStream(file); SimpleXMLStreamReader parser = new SimpleXMLStreamReader(fis); while (!parser.isStartElement()) parser.next(); String startElementName = parser.getLocalName(); //check that the first element is an msms_pipeline_analysis. I'm pretty //sure that this is required by the pepXML spec, but whether it is or not, //if we run into files where this doesn't hold true, will need to change. if ("msms_pipeline_analysis".equalsIgnoreCase(startElementName)) result = true; else { _log.debug("canHandleFile, First element is not msms_pipeline_analysis... it's " + startElementName); return false; } } catch (XMLStreamException xse) { _log.debug("canHandleFile, throwing exception with message " + xse.getMessage()); throw new IOException(xse.getMessage()); } finally { if (fis != null) fis.close(); } _log.debug("canHandleFile, returning true!"); return result; } public int getFirstSpectrumQueryIndex() { return firstSpectrumQueryIndex; } public void setFirstSpectrumQueryIndex(int firstSpectrumQueryIndex) { this.firstSpectrumQueryIndex = firstSpectrumQueryIndex; } public String getSearchEngine() { return _searchEngine; } public void setSearchEngine(String _searchEngine) { this._searchEngine = _searchEngine; } }