package com.compomics.util.experiment.io.identifications.idfilereaders;
import com.compomics.util.Util;
import com.compomics.util.experiment.biology.AminoAcidSequence;
import com.compomics.util.experiment.biology.Peptide;
import com.compomics.util.experiment.identification.Advocate;
import com.compomics.util.experiment.identification.spectrum_assumptions.PeptideAssumption;
import com.compomics.util.experiment.identification.identification_parameters.SearchParameters;
import com.compomics.util.experiment.identification.matches.ModificationMatch;
import com.compomics.util.experiment.identification.matches.SpectrumMatch;
import com.compomics.util.experiment.io.identifications.IdfileReader;
import com.compomics.util.experiment.massspectrometry.Charge;
import com.compomics.util.experiment.massspectrometry.Spectrum;
import com.compomics.util.experiment.massspectrometry.SpectrumFactory;
import com.compomics.util.experiment.personalization.ExperimentObject;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import com.compomics.util.waiting.WaitingHandler;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import javax.xml.bind.JAXBException;
import uk.ac.ebi.pride.tools.braf.BufferedRandomAccessFile;
/**
* This IdfileReader reads identifications from an Tide tsv results file.
*
* @author Harald Barsnes
*/
public class TideIdfileReader extends ExperimentObject implements IdfileReader {
/**
* The software name.
*/
private String softwareName = "Tide";
/**
* The softwareVersion.
*/
private String softwareVersion = null;
/**
* The Tide tsv file.
*/
private File tideTsvFile;
/**
* The spectrum factory used to retrieve spectrum titles.
*/
private SpectrumFactory spectrumFactory = SpectrumFactory.getInstance();
/**
* Default constructor for the purpose of instantiation.
*/
public TideIdfileReader() {
}
/**
* Constructor for a Tide tsv results file reader.
*
* @param tideTsvFile the Tide tsv file
* @throws FileNotFoundException if a FileNotFoundException occurs
* @throws IOException if an IOException occurs
*/
public TideIdfileReader(File tideTsvFile) throws FileNotFoundException, IOException {
this(tideTsvFile, null);
}
/**
* Constructor for an Tide tsv result file reader.
*
* @param tideTsvFile the Tide tsv file
* @param waitingHandler the waiting handler
* @throws FileNotFoundException if a FileNotFoundException occurs
* @throws IOException if an IOException occurs
*/
public TideIdfileReader(File tideTsvFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException {
this.tideTsvFile = tideTsvFile;
// get the tide version number
//extractVersionNumber(); // @TODO: how to get the Tide version number..?
}
// /**
// * Extracts the Tide version number.
// */
// private void extractVersionNumber() throws IOException {
//
// BufferedRandomAccessFile bufferedRandomAccessFile = new BufferedRandomAccessFile(tideTsvFile, "r", 1024 * 100);
//
// // read the version number, if available, requires ms amanda version 1.0.0.3196 or newer
// String versionNumberString = bufferedRandomAccessFile.readLine();
//
// if (versionNumberString.toLowerCase().startsWith("#version: ")) {
// softwareVersion = versionNumberString.substring("#version: ".length()).trim();
// }
//
// bufferedRandomAccessFile.close();
// }
@Override
public String getExtension() {
return ".tide-search.target.txt";
}
@Override
public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters)
throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException {
return getAllSpectrumMatches(waitingHandler, searchParameters, null, true);
}
@Override
public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters,
SequenceMatchingPreferences sequenceMatchingPreferences, boolean expandAaCombinations)
throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException {
LinkedList<SpectrumMatch> result = new LinkedList<SpectrumMatch>();
BufferedRandomAccessFile bufferedRandomAccessFile = new BufferedRandomAccessFile(tideTsvFile, "r", 1024 * 100);
if (waitingHandler != null) {
waitingHandler.resetSecondaryProgressCounter();
waitingHandler.setMaxSecondaryProgressCounter(100);
}
long progressUnit = bufferedRandomAccessFile.length() / 100;
// check if the version number is included, ms amanda version 1.0.0.3196 or newer
//String versionNumberString = bufferedRandomAccessFile.readLine(); // @TODO: how to get the tide version number?
String headerString = bufferedRandomAccessFile.readLine();
// skip the version number
// if (versionNumberString.toLowerCase().startsWith("#version: ")) {
// headerString = bufferedRandomAccessFile.readLine();
// } else {
// headerString = versionNumberString;
// }
// parse the header line
String[] headers = headerString.split("\t");
int scanNumberIndex = -1, chargeIndex = -1, precursorMzIndex = -1, spectrumNeutralLossIndex = -1, peptideMassIndex = -1, deltaCnIndex = -1,
spScoreIndex = -1, spRankIndex = -1, exactPValueIndex = -1, xcorrScoreIndex = -1, xcorrRank = -1,
bAndyIonsMatchedIndex = -1, bAndyIonsTotal = -1, distinctMatchesPerSpectrum = -1, sequenceIndex = -1,
cleavageType = -1, proteinId = -1, flankingAa = -1;
// get the column index of the headers
for (int i = 0; i < headers.length; i++) {
String header = headers[i];
if (header.equalsIgnoreCase("scan")) {
scanNumberIndex = i;
} else if (header.equalsIgnoreCase("charge")) {
chargeIndex = i;
} else if (header.equalsIgnoreCase("spectrum precursor m/z")) {
precursorMzIndex = i;
} else if (header.equalsIgnoreCase("spectrum neutral mass")) {
spectrumNeutralLossIndex = i;
} else if (header.equalsIgnoreCase("peptide mass")) {
peptideMassIndex = i;
} else if (header.equalsIgnoreCase("delta_cn")) {
deltaCnIndex = i;
} else if (header.equalsIgnoreCase("sp score")) {
spScoreIndex = i;
} else if (header.equalsIgnoreCase("sp rank")) {
spRankIndex = i;
} else if (header.equalsIgnoreCase("exact p-value")) {
exactPValueIndex = i;
} else if (header.equalsIgnoreCase("xcorr score")) {
xcorrScoreIndex = i;
} else if (header.equalsIgnoreCase("xcorr rank")) {
xcorrRank = i;
} else if (header.equalsIgnoreCase("b/y ions matched")) {
bAndyIonsMatchedIndex = i;
} else if (header.equalsIgnoreCase("b/y ions total")) {
bAndyIonsTotal = i;
} else if (header.equalsIgnoreCase("distinct matches/spectrum")) {
distinctMatchesPerSpectrum = i;
} else if (header.equalsIgnoreCase("sequence")) {
sequenceIndex = i;
} else if (header.equalsIgnoreCase("cleavage type")) {
cleavageType = i;
} else if (header.equalsIgnoreCase("protein id")) {
proteinId = i;
} else if (header.equalsIgnoreCase("flanking aa")) {
flankingAa = i;
}
}
// check if all the required header are found
if (scanNumberIndex == -1 || chargeIndex == -1 /**
* || exactPValueIndex == -1*
*/
|| xcorrRank == -1 || sequenceIndex == -1) {
throw new IllegalArgumentException("Mandatory columns are missing in the Tide tsv file. Please check the file!");
}
String line;
String currentSpectrumTitle = null;
SpectrumMatch currentMatch = null;
// get the name of the mgf file
String spectrumFileName = Util.getFileName(tideTsvFile);
spectrumFileName = spectrumFileName.substring(0, spectrumFileName.length() - ".tide-search.target.txt".length()) + ".mgf"; // @TODO: will only work for files from searchgui...
// get the psms
while ((line = bufferedRandomAccessFile.readLine()) != null) {
String[] elements = line.split("\t");
if (!line.trim().isEmpty()) { // @TODO: make this more robust?
int scanNumber = Integer.valueOf(elements[scanNumberIndex]);
String modifiedPeptideSequence = elements[sequenceIndex].toUpperCase();
int charge = Integer.valueOf(elements[chargeIndex]);
int rank;
if (exactPValueIndex != -1) {
rank = Integer.valueOf(elements[xcorrRank]);
} else {
rank = Integer.valueOf(elements[xcorrRank]);
}
double tideEValue, rawScore;
if (exactPValueIndex != -1) {
String scoreAsText = elements[exactPValueIndex];
tideEValue = Util.readDoubleAsString(scoreAsText);
rawScore = tideEValue;
} else {
String scoreAsText = elements[xcorrScoreIndex];
rawScore = Util.readDoubleAsString(scoreAsText);
if (rawScore < 0) {
tideEValue = 100;
} else {
tideEValue = Math.pow(10, -rawScore); // convert xcorr score to a kind of e-value
}
}
String spectrumTitle = scanNumber + "";
if (spectrumFactory.fileLoaded(spectrumFileName)) {
spectrumTitle = spectrumFactory.getSpectrumTitle(spectrumFileName, scanNumber);
}
// set up the yet empty spectrum match, or add to the current match
if (currentMatch == null || (currentSpectrumTitle != null && !currentSpectrumTitle.equalsIgnoreCase(spectrumTitle))) {
// add the previous match, if any
if (currentMatch != null) {
result.add(currentMatch);
}
currentMatch = new SpectrumMatch(Spectrum.getSpectrumKey(spectrumFileName, spectrumTitle));
currentMatch.setSpectrumNumber(scanNumber);
currentSpectrumTitle = spectrumTitle;
}
// get the modifications
ArrayList<ModificationMatch> utilitiesModifications = new ArrayList<ModificationMatch>();
String unmodifiedPeptideSequence = "";
// check if the peptide is modified
if (modifiedPeptideSequence.contains("[")) {
// we expect something like this: TAM[15.9949]AGK
for (int i = 0; i < modifiedPeptideSequence.length(); i++) {
if (modifiedPeptideSequence.charAt(i) != '[') {
unmodifiedPeptideSequence += modifiedPeptideSequence.charAt(i);
} else {
// we've arrived at a modification, for example: [15.9949]
char modifiedResidue = modifiedPeptideSequence.charAt(i - 1); // @TODO: test for terminal ptms!
double ptmMass = Double.parseDouble(modifiedPeptideSequence.substring(i + 1, modifiedPeptideSequence.indexOf("]", i + 1)));
utilitiesModifications.add(new ModificationMatch(ptmMass + "@" + modifiedResidue, true, i));
i = modifiedPeptideSequence.indexOf("]", i + 1);
}
}
} else {
unmodifiedPeptideSequence = modifiedPeptideSequence;
}
// create the peptide
Peptide peptide = new Peptide(unmodifiedPeptideSequence, utilitiesModifications, true);
// set up the charge
Charge peptideCharge = new Charge(Charge.PLUS, charge);
// create the peptide assumption
PeptideAssumption peptideAssumption = new PeptideAssumption(peptide, rank, Advocate.tide.getIndex(), peptideCharge, tideEValue, Util.getFileName(tideTsvFile));
peptideAssumption.setRawScore(rawScore);
if (expandAaCombinations && AminoAcidSequence.hasCombination(unmodifiedPeptideSequence)) {
ArrayList<ModificationMatch> previousModificationMatches = peptide.getModificationMatches(),
newModificationMatches = null;
if (previousModificationMatches != null) {
newModificationMatches = new ArrayList<ModificationMatch>(previousModificationMatches.size());
}
for (StringBuilder expandedSequence : AminoAcidSequence.getCombinations(peptide.getSequence())) {
Peptide newPeptide = new Peptide(expandedSequence.toString(), newModificationMatches, true);
if (previousModificationMatches != null) {
for (ModificationMatch modificationMatch : previousModificationMatches) {
newPeptide.addModificationMatch(new ModificationMatch(modificationMatch.getTheoreticPtm(), modificationMatch.isVariable(), modificationMatch.getModificationSite()));
}
}
PeptideAssumption newAssumption = new PeptideAssumption(newPeptide, peptideAssumption.getRank(), peptideAssumption.getAdvocate(), peptideAssumption.getIdentificationCharge(), peptideAssumption.getScore(), peptideAssumption.getIdentificationFile());
newAssumption.setRawScore(rawScore);
currentMatch.addHit(Advocate.tide.getIndex(), newAssumption, false);
}
} else {
//peptideAssumption.addUrParam(scoreParam);
currentMatch.addHit(Advocate.tide.getIndex(), peptideAssumption, false);
}
if (waitingHandler != null && progressUnit != 0) {
waitingHandler.setSecondaryProgressCounter((int) (bufferedRandomAccessFile.getFilePointer() / progressUnit));
if (waitingHandler.isRunCanceled()) {
bufferedRandomAccessFile.close();
break;
}
}
}
}
// add the last match, if any
if (currentMatch != null) {
result.add(currentMatch);
}
bufferedRandomAccessFile.close();
return result;
}
@Override
public void close() throws IOException {
tideTsvFile = null;
}
@Override
public HashMap<String, ArrayList<String>> getSoftwareVersions() {
HashMap<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>();
ArrayList<String> versions = new ArrayList<String>();
versions.add(softwareVersion);
result.put(softwareName, versions); // @TODO: check!!
return result;
}
@Override
public HashMap<String, LinkedList<SpectrumMatch>> getTagsMap() {
return new HashMap<String, LinkedList<SpectrumMatch>>(0);
}
@Override
public void clearTagsMap() {
// No tags here
}
@Override
public boolean hasDeNovoTags() {
return false;
}
}