/*
* Copyright (c) 2003-2012 Fred Hutchinson Cancer Research Center
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.fhcrc.cpl.toolbox.proteomics.feature.filehandler;
import org.fhcrc.cpl.toolbox.proteomics.feature.FeatureSet;
import org.fhcrc.cpl.toolbox.proteomics.feature.AnalyzeICAT;
import org.fhcrc.cpl.toolbox.proteomics.feature.Spectrum;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.MS2ExtraInfoDef;
import org.fhcrc.cpl.toolbox.proteomics.feature.extraInfo.IsotopicLabelExtraInfoDef;
import org.fhcrc.cpl.toolbox.ApplicationContext;
import org.fhcrc.cpl.toolbox.filehandler.SimpleXMLStreamReader;
import org.fhcrc.cpl.toolbox.proteomics.MS2Modification;
import org.fhcrc.cpl.toolbox.proteomics.ModifiedAminoAcid;
import org.apache.log4j.Logger;
import org.apache.xmlbeans.XmlException;
import org.systemsbiology.apmlparser.v2.*;
import org.systemsbiology.apmlparser.v2.datatype.*;
import org.systemsbiology.apmlparser.v2.datatype.Feature;
import javax.xml.stream.XMLStreamException;
import java.io.*;
import java.util.*;
/**
* File handler for native msInspect feature files
*/
public class APMLFeatureFileHandler extends BaseFeatureSetFileHandler
implements FeatureSetFileHandler
{
static Logger _log = Logger.getLogger(APMLFeatureFileHandler.class);
public static final String FILE_TYPE_NAME = "APML";
protected int minFeaturesCount = 0;
//TODO: get rid of these when implementing APML 2.0
//These are dummy values to assign to all features because APML 1.0 doesn't store them
protected static final float DUMMY_KL_SCORE = 2;
protected static final int DUMMY_NUM_PEAKS = 2;
//standardize quality score names for MS1 features
public static final String APML_QUALITY_SCORE_NAME_KL = "KL";
public static final String APML_QUALITY_SCORE_ACCMASS = "accurate_mass_flag";
public static final String APML_QUALITY_SCORE_SUMSQUARESDIST = "sum_squares_dist";
//standardize cluster type descriptions
public static final String APML_CLUSTERS_DESCRIPTION_LABELED_QUANT = "labeled_quantitation";
//Software parameter names
public static final String APML_SOFTWARE_PARAM_FEATURE_STRATEGY = "feature_strategy";
public static final String APML_SOFTWARE_PARAM_QUANT_LABEL = "quantitation_label";
//TODO: standardize these search score names with Mi-Youn
//Search score names, as appropriate for an APML file
public static final String APML_SEARCH_SCORE_NAME_PEPTIDE_PROPHET = "PeptideProphet";
protected static APMLFeatureFileHandler singletonInstance = null;
public static APMLFeatureFileHandler getSingletonInstance()
{
if (singletonInstance == null)
singletonInstance = new APMLFeatureFileHandler();
return singletonInstance;
}
/**
* Load a FeatureSet
* @param file
* @return
* @throws IOException
*/
public FeatureSet loadFeatureSet(File file)
throws IOException
{
_log.debug("loadFeatureSet begin");
DefaultAPMLReaderListener apmlReaderListener = new DefaultAPMLReaderListener();
APMLReader apmlReader = new APMLReader(apmlReaderListener);
apmlReader.setValidate(true);
apmlReader.setReadSingleScanPeaks(false);
try
{
apmlReader.read(file);
}
catch (Exception e)
{
e.printStackTrace(System.err);
throw new IOException("Failed to load APML from file " + file.getAbsolutePath() +
", type=" + e.getClass().getName() +
", message=" + e.getMessage());
}
if (apmlReaderListener.getDataType() != APMLReaderListener.DATA_TYPE_PEAK_LISTS)
{
throw new IOException("Tried to load a non-peak-list APML file. Quitting");
}
FeatureSet result = new FeatureSet();
DataProcessing apmlDataProcessing = apmlReaderListener.getDataProcessing();
result.setProperty("date",apmlDataProcessing.getProcessingDate().toString());
List<DataProcessing.Software> apmlSoftwares = apmlDataProcessing.getSoftwares();
boolean hasQuantitation = false;
AnalyzeICAT.IsotopicLabel quantLabel = null;
if (apmlSoftwares != null)
{
for (DataProcessing.Software apmlSoftware : apmlSoftwares)
{
if ("msinspect".equals(apmlSoftware.getName()))
{
String strategy =
apmlSoftware.getDataProcessingParam(APML_SOFTWARE_PARAM_FEATURE_STRATEGY);
if (strategy != null)
result.setProperty("algorithm",strategy);
String quantLabelString = apmlSoftware.getDataProcessingParam(APML_SOFTWARE_PARAM_QUANT_LABEL);
if (quantLabelString != null)
{
_log.debug("Found software parameter indicating quantitation");
hasQuantitation = true;
quantLabel = new AnalyzeICAT.IsotopicLabel(quantLabelString);
_log.debug("\tisotopic label: " + quantLabel);
result.addExtraInformationType(IsotopicLabelExtraInfoDef.getSingletonInstance());
}
}
}
//TODO: other params?
}
List<DefaultPeakListListener> apmlPeakListListeners = apmlReaderListener.getPeakListListeners();
if (apmlPeakListListeners.size() > 1)
{
ApplicationContext.infoMessage("WARNING: loading an APML file with more " +
"than one peak (feature) list. All peak lists after the first " +
"will be ignored");
}
DefaultPeakListListener peakListListener = apmlPeakListListeners.get(0);
result.setSourceFile(new File(peakListListener.getSource()));
List<org.fhcrc.cpl.toolbox.proteomics.feature.Feature> msInspectFeatures =
new ArrayList<org.fhcrc.cpl.toolbox.proteomics.feature.Feature>(peakListListener.getFeatures().size());
_log.debug("loadFeatureSet loading features");
//FeatureSet-level stuff to keep track of
boolean hasPpids = false;
List<MS2Modification>allMS2Mods = new ArrayList<MS2Modification>();
Map<org.systemsbiology.apmlparser.v2.datatype.Feature, org.systemsbiology.apmlparser.v2.datatype.Feature>
lightHeavyFeatureMap =
new HashMap<org.systemsbiology.apmlparser.v2.datatype.Feature,
org.systemsbiology.apmlparser.v2.datatype.Feature>();
List<BaseDefaultClustersListener> clustersListeners = apmlReaderListener.getClustersListeners();
boolean hasLabeledPairs = false;
if (clustersListeners != null && !clustersListeners.isEmpty())
{
for (BaseDefaultClustersListener clustersListener : clustersListeners)
{
if (APML_CLUSTERS_DESCRIPTION_LABELED_QUANT.equals(clustersListener.getDescription()))
{
hasLabeledPairs = true;
DefaultFeatureClustersListener featureClustersListener =
(DefaultFeatureClustersListener) clustersListener;
for (FeatureCluster featureCluster : featureClustersListener.getFeatureClusters())
{
List<org.systemsbiology.apmlparser.v2.datatype.Feature> clusterFeatures =
featureCluster.getFeatures();
if (clusterFeatures.size() != 2)
throw new IOException("Labeled quantitation cluster had " + clusterFeatures.size() +
" members!");
if (clusterFeatures.get(0).getCoord().getMz() < clusterFeatures.get(1).getCoord().getMz())
lightHeavyFeatureMap.put(clusterFeatures.get(0), clusterFeatures.get(1));
else
lightHeavyFeatureMap.put(clusterFeatures.get(1), clusterFeatures.get(0));
}
break;
}
}
}
if (hasQuantitation && !hasLabeledPairs)
throw new IOException("File has quantitation, but no labeled pairs found. Quitting");
//populate the feature list
for (org.systemsbiology.apmlparser.v2.datatype.Feature apmlFeature: peakListListener.getFeatures())
{
//if this is isotopically-labeled data and we've already identified this as a heavy feature, pass
if (hasLabeledPairs && lightHeavyFeatureMap.containsValue(apmlFeature))
continue;
org.fhcrc.cpl.toolbox.proteomics.feature.Feature msInspectFeature =
createMsInspectFeatureFromAPMLFeature(apmlFeature);
msInspectFeatures.add(msInspectFeature);
if (MS2ExtraInfoDef.getPeptideList(msInspectFeature) != null)
{
hasPpids = true;
List<ModifiedAminoAcid>[] modsThisPeptide =
MS2ExtraInfoDef.getModifiedAminoAcids(msInspectFeature);
if (modsThisPeptide != null)
{
for (List<ModifiedAminoAcid> modList : modsThisPeptide)
{
if (modList != null)
{
for (ModifiedAminoAcid mod : modList)
{
boolean foundIt = false;
for (MS2Modification ms2Mod : allMS2Mods)
{
if (ms2Mod.getAminoAcid().charAt(0) == mod.getAminoAcid() &&
ms2Mod.getMassDiff() == mod.getMass())
{
foundIt = true;
break;
}
}
if (!foundIt)
{
MS2Modification ms2Mod = new MS2Modification();
ms2Mod.setAminoAcid("" + mod.getAminoAcid());
ms2Mod.setMassDiff((float)mod.getMass());
allMS2Mods.add(ms2Mod);
}
}
}
}
}
}
if (hasQuantitation && hasLabeledPairs && lightHeavyFeatureMap.containsKey(apmlFeature))
{
org.systemsbiology.apmlparser.v2.datatype.Feature heavyFeature =
lightHeavyFeatureMap.get(apmlFeature);
float massDiff = heavyFeature.getCoord().getMass() - apmlFeature.getCoord().getMass();
float labelMassDiff = quantLabel.getHeavy() - quantLabel.getLight();
int numLabels = (int) (massDiff / labelMassDiff);
IsotopicLabelExtraInfoDef.setLabel(msInspectFeature, quantLabel);
IsotopicLabelExtraInfoDef.setLabelCount(msInspectFeature, numLabels);
float heavyIntensity = heavyFeature.getCoord().getApexIntensity();
float lightIntensity = apmlFeature.getCoord().getApexIntensity();
float ratio = lightIntensity / heavyIntensity;
IsotopicLabelExtraInfoDef.setRatio(msInspectFeature, ratio);
IsotopicLabelExtraInfoDef.setLightIntensity(msInspectFeature, lightIntensity);
IsotopicLabelExtraInfoDef.setLightIntensity(msInspectFeature, heavyIntensity);
IsotopicLabelExtraInfoDef.setHeavyMass(msInspectFeature, heavyFeature.getCoord().getMass());
}
}
result.setFeatures(msInspectFeatures.toArray(
new org.fhcrc.cpl.toolbox.proteomics.feature.Feature[msInspectFeatures.size()]));
_log.debug("Loaded " + result.getFeatures().length + " features from APML file.");
if (hasPpids)
{
result.addExtraInformationType(MS2ExtraInfoDef.getSingletonInstance());
}
if (allMS2Mods.size() > 0)
{
MS2Modification[] modifications =
allMS2Mods.toArray(new MS2Modification[0]);
_log.debug("\tloaded " + (modifications == null ? 0 : modifications.length +
" MS2 modifications"));
MS2ExtraInfoDef.setFeatureSetModifications(result, modifications);
}
return result;
}
/**
* This method knows the mapping between APML feature fields and
* msInspect feature fields
* @param apmlFeature
* @return
*/
public org.fhcrc.cpl.toolbox.proteomics.feature.Feature
createMsInspectFeatureFromAPMLFeature(
org.systemsbiology.apmlparser.v2.datatype.Feature apmlFeature)
{
org.fhcrc.cpl.toolbox.proteomics.feature.Feature result =
new org.fhcrc.cpl.toolbox.proteomics.feature.Feature();
//handle coordinate information
Coordinate coord = apmlFeature.getCoord();
result.setScan(coord.getApexScan());
Coordinate.Range<Integer> apmlScanRange = coord.getScanRange();
if (apmlScanRange != null)
{
result.setScanFirst(apmlScanRange.getMin());
result.setScanLast(apmlScanRange.getMax());
}
else
{
result.setScanFirst(result.getScan());
result.setScanLast(result.getScan());
}
//if APML scancount isn't set, default to 1 scan
result.setScanCount(Math.max(coord.getScanCount(), 1));
result.setMz(coord.getMz());
result.setMass(coord.getMass());
result.setCharge(coord.getCharge());
//minTime and maxTime, and minMz and maxMz, are ignored
result.setTime(coord.getRt());
result.setIntensity(coord.getApexIntensity());
result.setTotalIntensity(coord.getIntensity());
result.setPeaks(apmlFeature.getNumPeaks());
List<MultiScanPeak> multiScanPeaks = apmlFeature.getMultiScanPeaks();
if (multiScanPeaks != null && !multiScanPeaks.isEmpty())
{
Spectrum.Peak[] msInspectPeaks = new Spectrum.Peak[multiScanPeaks.size()];
for (int i=0; i<multiScanPeaks.size(); i++)
{
MultiScanPeak multiScanPeak = multiScanPeaks.get(i);
org.fhcrc.cpl.toolbox.proteomics.feature.Feature peakFeature =
new org.fhcrc.cpl.toolbox.proteomics.feature.Feature(
multiScanPeak.getCoordinate().getApexScan(), multiScanPeak.getCoordinate().getMz(),
multiScanPeak.getCoordinate().getApexIntensity());
peakFeature.setTotalIntensity(multiScanPeak.getCoordinate().getIntensity());
peakFeature.setScanFirst(multiScanPeak.getCoordinate().getScanRange().getMin());
peakFeature.setScanLast(multiScanPeak.getCoordinate().getScanRange().getMax());
msInspectPeaks[i] = peakFeature;
}
result.comprised = msInspectPeaks;
result.peaks = msInspectPeaks.length;
}
List<org.systemsbiology.apmlparser.v2.datatype.Feature.QualityScore> apmlQualityScores =
apmlFeature.getQualityScores();
if (apmlQualityScores != null && !apmlQualityScores.isEmpty())
{
for (org.systemsbiology.apmlparser.v2.datatype.Feature.QualityScore apmlQualityScore : apmlQualityScores)
{
String scoreName = apmlQualityScore.getScoreName();
String scoreValue = apmlQualityScore.getScoreValue();
if (APML_QUALITY_SCORE_NAME_KL.equals(scoreName))
result.setKl(Float.parseFloat(scoreValue));
else if (APML_QUALITY_SCORE_ACCMASS.equals(scoreName))
result.setAccurateMZ(Boolean.parseBoolean(scoreValue));
else if (APML_QUALITY_SCORE_SUMSQUARESDIST.equals(scoreName))
result.setSumSquaresDist(Float.parseFloat(scoreValue));
}
}
if (apmlFeature.getAnnotation() != null)
result.setDescription(apmlFeature.getAnnotation());
if (apmlFeature.getPpids() != null && apmlFeature.getPpidsSize() > 0)
{
//TODO: handle multiple ppids
PutativePeptideId ppid = apmlFeature.getPpids().get(0);
String peptideSequence = ppid.getPeptideSequence();
MS2ExtraInfoDef.addPeptide(result, peptideSequence);
List<Modification> apmlMods = ppid.getModifications();
if (apmlMods != null && !apmlMods.isEmpty())
{
List<ModifiedAminoAcid>[] modifiedAminoAcids = new List[peptideSequence.length()];
for (Modification apmlMod : apmlMods)
{
int position = apmlMod.getPosition();
List<ModifiedAminoAcid> modList = modifiedAminoAcids[position];
if (modList == null)
{
modList = new ArrayList<ModifiedAminoAcid>();
modifiedAminoAcids[position] = modList;
}
modList.add(new ModifiedAminoAcid(peptideSequence.charAt(position), apmlMod.getPosition()));
}
MS2ExtraInfoDef.setModifiedAminoAcids(result, modifiedAminoAcids);
}
//TODO: right now we don't handle multiple possible proteins per peptide
List<String> proteinAccessionNum = ppid.getProteinAccessionNumbers();
if (proteinAccessionNum != null && proteinAccessionNum.size() > 0)
MS2ExtraInfoDef.addProtein(result, proteinAccessionNum.get(0));
List<PutativePeptideId.SearchScore> ms2SearchScores = ppid.getMs2SearchScores();
if (ms2SearchScores != null && !ms2SearchScores.isEmpty())
{
for (PutativePeptideId.SearchScore ms2SearchScore : ms2SearchScores)
{
String searchScoreName = ms2SearchScore.getScoreName();
if (APML_SEARCH_SCORE_NAME_PEPTIDE_PROPHET.equals(searchScoreName))
{
//todo: what if this doesn't parse? Should check type?
MS2ExtraInfoDef.setPeptideProphet(result,
Double.parseDouble(ms2SearchScore.getScoreValue()));
}
}
}
List<Modification> apmlModifications = ppid.getModifications();
if (apmlModifications != null && apmlModifications.size() > 0)
{
List<ModifiedAminoAcid>[] modListArray =
(List<ModifiedAminoAcid>[])
new List[ppid.getPeptideSequence().length()];
for (Modification apmlMod : apmlModifications)
{
char newModChar = ppid.getPeptideSequence().charAt(apmlMod.getPosition());
//TODO: we store these zero-based. How does APML store them?
//TODO: assuming zero-based for now
List<ModifiedAminoAcid> modListThisIndex =
modListArray[apmlMod.getPosition()];
if (modListThisIndex == null)
{
modListThisIndex =
new ArrayList<ModifiedAminoAcid>();
modListArray[apmlMod.getPosition()] = modListThisIndex;
}
ModifiedAminoAcid msInspectMod =
new ModifiedAminoAcid(newModChar, apmlMod.getValue());
modListThisIndex.add(msInspectMod);
}
MS2ExtraInfoDef.setModifiedAminoAcids(result, modListArray);
}
}
return result;
}
public void saveFeatureSet(FeatureSet featureSet, File outFile)
throws IOException
{
APMLWriter apmlWriter = new APMLWriter();
DataProcessing dataProcessing = new DataProcessing();
dataProcessing.setProcessingDate(new GregorianCalendar());
DataProcessing.Software msInspectSoftware = new DataProcessing.Software();
msInspectSoftware.setName("msinspect");
msInspectSoftware.setType(DataProcessing.Software.TYPE_PEAK_PICKING);
String featureFindingAlgorithm = (String) featureSet.getProperty("algorithm");
if (featureFindingAlgorithm != null)
msInspectSoftware.addDataProcessingParam(APML_SOFTWARE_PARAM_FEATURE_STRATEGY, featureFindingAlgorithm);
dataProcessing.addSoftware(msInspectSoftware);
boolean hasQuant = featureSet.hasExtraInformationType(IsotopicLabelExtraInfoDef.getSingletonInstance());
// dp.setType(ProcessStatus.PEAK_PICKING);
// dp.setSoftware("msInspect");
// //todo: what if revision is null?
// dp.setVersion((String)featureSet.getProperty("revision"));
org.fhcrc.cpl.toolbox.proteomics.feature.Feature[] msInspectFeatures =
featureSet.getFeatures();
ArrayList<org.systemsbiology.apmlparser.v2.datatype.Feature> apmlFeatureList =
new ArrayList<org.systemsbiology.apmlparser.v2.datatype.Feature>(
msInspectFeatures.length);
int currentFeatureId = 1;
List<Cluster> quantClusters = new ArrayList<Cluster>();
int currentClusterId = 1;
boolean foundLabel = false;
for (org.fhcrc.cpl.toolbox.proteomics.feature.Feature msInspectFeature : msInspectFeatures)
{
org.systemsbiology.apmlparser.v2.datatype.Feature apmlFeature =
createAPMLFeature(msInspectFeature);
apmlFeature.setId(currentFeatureId++);
apmlFeatureList.add(apmlFeature);
if (msInspectFeature.comprised != null)
{
for (Spectrum.Peak peak : msInspectFeature.comprised)
{
if (peak == null)
continue;
org.fhcrc.cpl.toolbox.proteomics.feature.Feature peakFeature =
(org.fhcrc.cpl.toolbox.proteomics.feature.Feature) peak;
Coordinate peakCoordinate = new Coordinate();
peakCoordinate.setMz(peakFeature.getMz());
peakCoordinate.setApexIntensity(peakFeature.getIntensity());
peakCoordinate.setIntensity(peakFeature.getTotalIntensity());
peakCoordinate.setScanRange(new Coordinate.Range<Integer>(
peakFeature.getScanFirst(), peakFeature.getScanLast()));
peakCoordinate.setApexScan(peakFeature.getScan());
float mzDiff = peak.getMz() - msInspectFeature.getMz();
int peakOffset = Math.round(mzDiff * msInspectFeature.charge);
//if (peakOffset < 0)
// System.err.println("***" + peak.getMz() + ", " + msInspectFeature.getMz() + ", " + msInspectFeature.charge + " ******" + msInspectFeature);
MultiScanPeak multiScanPeak = new MultiScanPeak(peakCoordinate, peakOffset);
apmlFeature.addMultiScanPeak(multiScanPeak);
}
}
if (hasQuant && IsotopicLabelExtraInfoDef.hasRatio(msInspectFeature))
{
float ratio = (float) IsotopicLabelExtraInfoDef.getRatio(msInspectFeature);
//create a heavy feature, initially identical to the light one
org.systemsbiology.apmlparser.v2.datatype.Feature heavyFeature = createAPMLFeature(msInspectFeature);
heavyFeature.setId(currentFeatureId++);
apmlFeatureList.add(heavyFeature);
float lightIntensity = (float) IsotopicLabelExtraInfoDef.getLightIntensity(msInspectFeature);
float heavyIntensity = (float) IsotopicLabelExtraInfoDef.getHeavyIntensity(msInspectFeature);
float massDiff = IsotopicLabelExtraInfoDef.getLabel(msInspectFeature).getHeavy() -
IsotopicLabelExtraInfoDef.getLabel(msInspectFeature).getLight();
heavyFeature.getCoord().setMass(apmlFeature.getCoord().getMass() + massDiff);
float mzDiff = massDiff * IsotopicLabelExtraInfoDef.getLabelCount(msInspectFeature) /
msInspectFeature.getCharge();
heavyFeature.getCoord().setMz(apmlFeature.getCoord().getMz() + mzDiff);
if (lightIntensity != 0 && heavyIntensity != 0)
{
apmlFeature.getCoord().setApexIntensity(lightIntensity);
heavyFeature.getCoord().setApexIntensity(heavyIntensity);
}
else
{
heavyFeature.getCoord().setApexIntensity(msInspectFeature.getIntensity() / ratio);
}
FeatureCluster labelCluster = new FeatureCluster();
labelCluster.addFeature(apmlFeature);
labelCluster.addFeature(heavyFeature);
labelCluster.setId(currentClusterId++);
labelCluster.setClassification(APML_CLUSTERS_DESCRIPTION_LABELED_QUANT);
quantClusters.add(labelCluster);
if (!foundLabel)
{
msInspectSoftware.addDataProcessingParam(APML_SOFTWARE_PARAM_QUANT_LABEL,
IsotopicLabelExtraInfoDef.getLabel(msInspectFeature).toString());
foundLabel = true;
}
}
}
if (hasQuant && !foundLabel)
throw new IOException("File seems to have quantitation, but no labeled features were found. Quitting");
String filePath = outFile.getAbsolutePath();
if (featureSet.getSourceFile() != null)
filePath = featureSet.getSourceFile().getAbsolutePath();
try
{
if (hasQuant)
apmlWriter.writePeakListFile(outFile, dataProcessing, apmlFeatureList.iterator(),
filePath, apmlFeatureList.size(),
1, APML_CLUSTERS_DESCRIPTION_LABELED_QUANT, quantClusters);
else
apmlWriter.writePeakListFile(outFile, dataProcessing, apmlFeatureList.iterator(),
filePath, apmlFeatureList.size());
}
catch (XMLStreamException e)
{
ApplicationContext.errorMessage("Failed to save APML",e);
}
catch (XmlException xe)
{
ApplicationContext.errorMessage("Failed to save APML",xe);
}
}
/**
* This method knows the mapping of fields from msInspect-style features to
* APML-style features
* @param msInspectFeature
* @return
*/
public org.systemsbiology.apmlparser.v2.datatype.Feature createAPMLFeature(
org.fhcrc.cpl.toolbox.proteomics.feature.Feature msInspectFeature)
{
org.systemsbiology.apmlparser.v2.datatype.Feature result =
new org.systemsbiology.apmlparser.v2.datatype.Feature();
Coordinate coord = new Coordinate();
coord.setApexScan(msInspectFeature.getScan());
Coordinate.Range<Integer> scanRange =
new Coordinate.Range<Integer>(msInspectFeature.getScanFirst(),
msInspectFeature.getScanLast());
coord.setScanCount(msInspectFeature.getScanCount());
coord.setScanRange(scanRange);
coord.setMz(msInspectFeature.getMz());
coord.setMass(msInspectFeature.getMass());
coord.setRt(msInspectFeature.getTime());
coord.setApexIntensity(msInspectFeature.getIntensity());
coord.setIntensity(msInspectFeature.getTotalIntensity());
coord.setCharge(msInspectFeature.getCharge());
result.setCoord(coord);
Feature.QualityScore klQualityScore = new Feature.QualityScore(APML_QUALITY_SCORE_NAME_KL,
Float.toString(msInspectFeature.getKl()),
Feature.QualityScore.TYPE_DECIMAL);
result.addQualityScore(klQualityScore);
Feature.QualityScore accMassQualityScore =
new Feature.QualityScore(APML_QUALITY_SCORE_ACCMASS,
Boolean.toString(msInspectFeature.isAccurateMZ()),
Feature.QualityScore.TYPE_BOOLEAN);
result.addQualityScore(accMassQualityScore);
Feature.QualityScore sumSquaresQualityScore =
new Feature.QualityScore(APML_QUALITY_SCORE_SUMSQUARESDIST,
Float.toString(msInspectFeature.getSumSquaresDist()),
Feature.QualityScore.TYPE_DECIMAL);
result.addQualityScore(sumSquaresQualityScore);
if (msInspectFeature.getDescription() != null)
result.setAnnotation(msInspectFeature.getDescription());
//TODO: support multiple peptides per feature
String peptide = MS2ExtraInfoDef.getFirstPeptide(msInspectFeature);
if (peptide != null)
{
PutativePeptideId ppid = new PutativePeptideId();
ppid.setPeptideSequence(peptide);
if (MS2ExtraInfoDef.hasPeptideProphet(msInspectFeature))
{
PutativePeptideId.SearchScore searchScore =
new PutativePeptideId.SearchScore(APML_SEARCH_SCORE_NAME_PEPTIDE_PROPHET,
"" + MS2ExtraInfoDef.getPeptideProphet(msInspectFeature));
ppid.addMs2SearchScore(searchScore);
}
List<ModifiedAminoAcid>[] modifiedAAs = MS2ExtraInfoDef.getModifiedAminoAcids(msInspectFeature);
if (modifiedAAs != null && modifiedAAs.length > 0)
{
for (int i=0; i<modifiedAAs.length; i++)
{
List<ModifiedAminoAcid> modsThisIndex = modifiedAAs[i];
if (modsThisIndex != null)
{
for (ModifiedAminoAcid mod : modsThisIndex)
ppid.addModification(new Modification(i, (float) mod.getMass()));
}
}
}
ArrayList<PutativePeptideId> ppids =
new ArrayList<PutativePeptideId>(1);
ppids.add(ppid);
result.setPpids(ppids);
}
return result;
}
/**
* Save a FeatureSet
* @param featureSet
* @param out
*/
public void saveFeatureSet(FeatureSet featureSet, PrintWriter out)
{
throw new IllegalArgumentException(
"This version of saveFeatureSet not implemented in APMLFeatureFileHandler");
}
/**
* Can this type of file handler handle this specific file?
*
* @param file
* @return
* @throws IOException
*/
public boolean canHandleFile(File file)
throws IOException
{
if (!isXMLFile(file))
return false;
FileInputStream fis = null;
boolean result = false;
try
{
fis = new FileInputStream(file);
SimpleXMLStreamReader parser = new SimpleXMLStreamReader(fis);
while (!parser.isStartElement())
parser.next();
String startElementName = parser.getLocalName();
//check that the first element is an msms_pipeline_analysis. I'm pretty
//sure that this is required by the pepXML spec, but whether it is or not,
//if we run into files where this doesn't hold true, will need to change.
if ("apml".equalsIgnoreCase(startElementName))
result = true;
}
catch (XMLStreamException xse)
{
throw new IOException(xse.getMessage());
}
finally
{
if (fis != null)
fis.close();
}
return result;
}
}