package com.compomics.util.experiment.io.identifications.idfilereaders;
import com.compomics.util.Util;
import com.compomics.util.experiment.biology.AminoAcidSequence;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.identification.Advocate;
import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption;
import com.compomics.util.experiment.identification.identification_parameters.SearchParameters;
import com.compomics.util.experiment.identification.matches.ModificationMatch;
import com.compomics.util.experiment.identification.matches.SpectrumMatch;
import com.compomics.util.experiment.io.identifications.IdfileReader;
import com.compomics.util.experiment.massspectrometry.Charge;
import com.compomics.util.experiment.massspectrometry.Spectrum;
import com.compomics.util.experiment.personalization.ExperimentObject;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import com.compomics.util.waiting.WaitingHandler;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URLDecoder;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import javax.xml.bind.JAXBException;
import uk.ac.ebi.pride.tools.braf.BufferedRandomAccessFile;
/**
* This IdfileReader reads identifications from an MS Amanda csv result file.
*
* @author Harald Barsnes
*/
public class MsAmandaIdfileReader extends ExperimentObject implements IdfileReader {
/**
* The software name.
*/
private String softwareName = "MS Amanda";
/**
* The softwareVersion.
*/
private String softwareVersion = null;
/**
* The MS Amanda csv file.
*/
private File msAmandaCsvFile;
/**
* Default constructor for the purpose of instantiation.
*/
public MsAmandaIdfileReader() {
}
/**
* Constructor for an MS Amanda csv result file reader.
*
* @param msAmandaCsvFile the MS Amanda csv file
* @throws FileNotFoundException if a FileNotFoundException occurs
* @throws IOException if an IOException occurs
*/
public MsAmandaIdfileReader(File msAmandaCsvFile) throws FileNotFoundException, IOException {
this(msAmandaCsvFile, null);
}
/**
* Constructor for an MS Amanda csv result file reader.
*
* @param msAmandaCsvFile the MS Amanda csv file
* @param waitingHandler the waiting handler
* @throws FileNotFoundException if a FileNotFoundException occurs
* @throws IOException if an IOException occurs
*/
public MsAmandaIdfileReader(File msAmandaCsvFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException {
this.msAmandaCsvFile = msAmandaCsvFile;
// get the ms amanda version number
extractVersionNumber();
}
/**
* Extracts the MS Amanda version number.
*/
private void extractVersionNumber() throws IOException {
BufferedRandomAccessFile bufferedRandomAccessFile = new BufferedRandomAccessFile(msAmandaCsvFile, "r", 1024 * 100);
// read the version number, if available, requires ms amanda version 1.0.0.3196 or newer
String versionNumberString = bufferedRandomAccessFile.readLine();
if (versionNumberString.toLowerCase().startsWith("#version: ")) {
softwareVersion = versionNumberString.substring("#version: ".length()).trim();
}
bufferedRandomAccessFile.close();
}
@Override
public String getExtension() {
return ".ms-amanda.csv";
}
@Override
public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters)
throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException {
return getAllSpectrumMatches(waitingHandler, searchParameters, null, true);
}
@Override
public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters,
SequenceMatchingPreferences sequenceMatchingPreferences, boolean expandAaCombinations)
throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException {
LinkedList<SpectrumMatch> result = new LinkedList<SpectrumMatch>();
BufferedRandomAccessFile bufferedRandomAccessFile = new BufferedRandomAccessFile(msAmandaCsvFile, "r", 1024 * 100);
if (waitingHandler != null) {
waitingHandler.resetSecondaryProgressCounter();
waitingHandler.setMaxSecondaryProgressCounter(100);
}
long progressUnit = bufferedRandomAccessFile.length() / 100;
// check if the version number is included, ms amanda version 1.0.0.3196 or newer
String versionNumberString = bufferedRandomAccessFile.readLine();
String headerString;
// skip the version number
if (versionNumberString.toLowerCase().startsWith("#version: ")) {
headerString = bufferedRandomAccessFile.readLine();
} else {
headerString = versionNumberString;
}
// parse the header line
String[] headers = headerString.split("\t");
int scanNumberIndex = -1, titleIndex = -1, sequenceIndex = -1, modificationsIndex = -1, proteinAccessionsIndex = -1,
amandaScoreIndex = -1, rankIndex = -1, mzIndex = -1, chargeIndex = -1, rtIndex = -1, filenameIndex = -1,
amandaWeightedProbabilityIndex = -1;
// get the column index of the headers
for (int i = 0; i < headers.length; i++) {
String header = headers[i];
if (header.equalsIgnoreCase("Scan Number")) {
scanNumberIndex = i;
} else if (header.equalsIgnoreCase("Title")) {
titleIndex = i;
} else if (header.equalsIgnoreCase("Sequence")) {
sequenceIndex = i;
} else if (header.equalsIgnoreCase("Modifications")) {
modificationsIndex = i;
} else if (header.equalsIgnoreCase("Protein Accessions")) {
proteinAccessionsIndex = i;
} else if (header.equalsIgnoreCase("Amanda Score")) {
amandaScoreIndex = i;
} else if (header.equalsIgnoreCase("Weighted Probability")) {
amandaWeightedProbabilityIndex = i;
} else if (header.equalsIgnoreCase("Rank")) {
rankIndex = i;
} else if (header.equalsIgnoreCase("m/z")) {
mzIndex = i;
} else if (header.equalsIgnoreCase("Charge")) {
chargeIndex = i;
} else if (header.equalsIgnoreCase("RT")) {
rtIndex = i;
} else if (header.equalsIgnoreCase("Filename")) {
filenameIndex = i;
}
}
// check if all the required header are found
if (scanNumberIndex == -1 || titleIndex == -1 || sequenceIndex == -1 || modificationsIndex == -1
|| proteinAccessionsIndex == -1 || amandaScoreIndex == -1 || rankIndex == -1
|| mzIndex == -1 || chargeIndex == -1 || filenameIndex == -1) {
throw new IllegalArgumentException("Mandatory columns are missing in the MS Amanda csv file. Please check the file!");
}
String line;
String currentSpectrumTitle = null;
SpectrumMatch currentMatch = null;
// get the psms
while ((line = bufferedRandomAccessFile.readLine()) != null) {
String[] elements = line.split("\t");
if (!line.trim().isEmpty()) { // @TODO: make this more robust?
//String scanNumber = elements[scanNumberIndex]; // not currently used
String spectrumTitle = elements[titleIndex];
String peptideSequence = elements[sequenceIndex].toUpperCase();
String modifications = elements[modificationsIndex].trim();
//String proteinAccessions = elements[proteinAccessionsIndex]; // not currently used
// get the ms amanda score
String scoreAsText = elements[amandaScoreIndex];
double msAmandaRawScore = Util.readDoubleAsString(scoreAsText);
double msAmandaTransformedScore;
// get the ms amanda e-value
if (amandaWeightedProbabilityIndex != -1) {
String eVaulueAsText = elements[amandaWeightedProbabilityIndex];
msAmandaTransformedScore = Util.readDoubleAsString(eVaulueAsText);
} else {
msAmandaTransformedScore = Math.pow(10, -msAmandaRawScore); // convert ms amanda score to e-value like
}
int rank = Integer.valueOf(elements[rankIndex]);
//String mzAsText = elements[mzIndex]; // not currently used
//double mz = Util.readDoubleAsString(mzAsText);
int charge = Integer.valueOf(elements[chargeIndex]);
//String rtAsText = elements[rtIndex]; // not currently used, and not mandatory, as old csv files didn't have this one...
//double rt = Util.readDoubleAsString(rtAsText); // @TODO: should escape retention times such as PT2700.460000S
String fileName = elements[filenameIndex];
// remove any html from the title
spectrumTitle = URLDecoder.decode(spectrumTitle, "utf-8");
// set up the yet empty spectrum match, or add to the current match
if (currentMatch == null || (currentSpectrumTitle != null && !currentSpectrumTitle.equalsIgnoreCase(spectrumTitle))) {
// add the previous match, if any
if (currentMatch != null) {
result.add(currentMatch);
}
currentMatch = new SpectrumMatch(Spectrum.getSpectrumKey(fileName, spectrumTitle));
currentSpectrumTitle = spectrumTitle;
}
// get the modifications
ArrayList<ModificationMatch> utilitiesModifications = new ArrayList<ModificationMatch>();
if (!modifications.isEmpty()) {
String[] ptms = modifications.split(";");
for (String ptm : ptms) {
try {
// we expect something like this:
// N-Term(acetylation of protein n-term|42.010565|variable) or
// C4(carbamidomethyl c|57.021464|fixed)
String location = ptm.substring(0, ptm.indexOf("("));
int modSite;
if (location.equalsIgnoreCase("N-Term")) {
modSite = 1;
} else if (location.equalsIgnoreCase("C-Term")) {
modSite = peptideSequence.length() + 1;
} else {
// amino acid type and index expected, e.g., C4 or M3
modSite = Integer.parseInt(ptm.substring(1, ptm.indexOf("(")));
}
String rest = ptm.substring(ptm.indexOf("(") + 1, ptm.length() - 1).toLowerCase();
String[] details = rest.split("\\|");
String ptmName = details[0]; // not currently used
String ptmMassAsString = details[1];
double ptmMass = Util.readDoubleAsString(ptmMassAsString);
String ptmFixedStatus = details[2];
if (ptmFixedStatus.equalsIgnoreCase("variable")) {
utilitiesModifications.add(new ModificationMatch(ptmMass + "@" + peptideSequence.charAt(modSite - 1), true, modSite));
}
} catch (Exception e) {
throw new IllegalArgumentException("Error parsing ptm: " + ptm + "!");
}
}
}
// create the peptide
Peptide peptide = new Peptide(peptideSequence, utilitiesModifications, true);
// set up the charge
Charge peptideCharge = new Charge(Charge.PLUS, charge);
// create the peptide assumption
PeptideAssumption peptideAssumption = new PeptideAssumption(peptide, rank, Advocate.msAmanda.getIndex(), peptideCharge, msAmandaTransformedScore, Util.getFileName(msAmandaCsvFile));
peptideAssumption.setRawScore(msAmandaRawScore);
if (expandAaCombinations && AminoAcidSequence.hasCombination(peptideSequence)) {
ArrayList<ModificationMatch> previousModificationMatches = peptide.getModificationMatches(),
newModificationMatches = null;
if (previousModificationMatches != null) {
newModificationMatches = new ArrayList<ModificationMatch>(previousModificationMatches.size());
}
for (StringBuilder expandedSequence : AminoAcidSequence.getCombinations(peptide.getSequence())) {
Peptide newPeptide = new Peptide(expandedSequence.toString(), newModificationMatches, true);
if (previousModificationMatches != null) {
for (ModificationMatch modificationMatch : previousModificationMatches) {
newPeptide.addModificationMatch(new ModificationMatch(modificationMatch.getTheoreticPtm(), modificationMatch.isVariable(), modificationMatch.getModificationSite()));
}
}
PeptideAssumption newAssumption = new PeptideAssumption(newPeptide, peptideAssumption.getRank(), peptideAssumption.getAdvocate(), peptideAssumption.getIdentificationCharge(), peptideAssumption.getScore(), peptideAssumption.getIdentificationFile());
newAssumption.setRawScore(msAmandaRawScore);
currentMatch.addHit(Advocate.msAmanda.getIndex(), newAssumption, false);
}
} else {
currentMatch.addHit(Advocate.msAmanda.getIndex(), peptideAssumption, false);
}
if (waitingHandler != null && progressUnit != 0) {
waitingHandler.setSecondaryProgressCounter((int) (bufferedRandomAccessFile.getFilePointer() / progressUnit));
if (waitingHandler.isRunCanceled()) {
bufferedRandomAccessFile.close();
break;
}
}
}
}
// add the last match, if any
if (currentMatch != null) {
result.add(currentMatch);
}
bufferedRandomAccessFile.close();
return result;
}
@Override
public void close() throws IOException {
msAmandaCsvFile = null;
}
@Override
public HashMap<String, ArrayList<String>> getSoftwareVersions() {
HashMap<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>();
ArrayList<String> versions = new ArrayList<String>();
versions.add(softwareVersion);
result.put(softwareName, versions);
return result;
}
@Override
public HashMap<String, LinkedList<SpectrumMatch>> getTagsMap() {
return new HashMap<String, LinkedList<SpectrumMatch>>(0);
}
@Override
public void clearTagsMap() {
// No tags here
}
@Override
public boolean hasDeNovoTags() {
return false;
}
}