package com.compomics.util.experiment.io.identifications.idfilereaders;
import com.compomics.util.Util;
import com.compomics.util.experiment.biology.AminoAcidSequence;
import com.compomics.util.experiment.identification.Advocate;
import com.compomics.util.experiment.identification.identification_parameters.SearchParameters;
import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory;
import com.compomics.util.experiment.identification.spectrum_assumptions.TagAssumption;
import com.compomics.util.experiment.identification.identification_parameters.tool_specific.PNovoParameters;
import com.compomics.util.experiment.identification.matches.ModificationMatch;
import com.compomics.util.experiment.identification.matches.SpectrumMatch;
import com.compomics.util.experiment.identification.amino_acid_tags.Tag;
import com.compomics.util.experiment.identification.protein_inference.PeptideMapperType;
import com.compomics.util.experiment.identification.protein_inference.proteintree.ProteinTree;
import com.compomics.util.experiment.io.identifications.IdfileReader;
import com.compomics.util.experiment.massspectrometry.Charge;
import com.compomics.util.experiment.massspectrometry.Spectrum;
import com.compomics.util.experiment.personalization.ExperimentObject;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import com.compomics.util.waiting.WaitingHandler;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URLDecoder;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import javax.xml.bind.JAXBException;
import uk.ac.ebi.pride.tools.braf.BufferedRandomAccessFile;
/**
* This class can be used to parse pNovo identification files.
*
* @author Harald Barsnes
*/
public class PNovoIdfileReader extends ExperimentObject implements IdfileReader {
/**
* A map of all spectrum titles and the associated index in the random
* access file.
*/
private HashMap<String, Long> index;
/**
* The result file in random access.
*/
private BufferedRandomAccessFile bufferedRandomAccessFile = null;
/**
* The name of the result file.
*/
private String fileName;
/**
* Map of the tags found indexed by amino acid sequence.
*/
private HashMap<String, LinkedList<SpectrumMatch>> tagsMap;
/**
* Default constructor for the purpose of instantiation.
*/
public PNovoIdfileReader() {
}
/**
* Constructor, initiate the parser. Displays the progress using the waiting
* handler. The close() method shall be used when the file reader is no
* longer used.
*
* @param identificationFile the identification file to parse
*
* @throws FileNotFoundException exception thrown whenever the provided file
* was not found
* @throws IOException exception thrown whenever an error occurred while
* reading the file
*/
public PNovoIdfileReader(File identificationFile) throws FileNotFoundException, IOException {
this(identificationFile, null);
}
/**
* Constructor, initiate the parser. Displays the progress using the waiting
* handler. The close() method shall be used when the file reader is no
* longer used.
*
* @param identificationFile the identification file to parse
* @param waitingHandler a waiting handler providing progress feedback to
* the user
*
* @throws FileNotFoundException exception thrown whenever the provided file
* was not found
* @throws IOException exception thrown whenever an error occurred while
* reading the file
*/
public PNovoIdfileReader(File identificationFile, WaitingHandler waitingHandler) throws FileNotFoundException, IOException {
bufferedRandomAccessFile = new BufferedRandomAccessFile(identificationFile, "r", 1024 * 100);
fileName = Util.getFileName(identificationFile);
if (waitingHandler != null) {
waitingHandler.resetSecondaryProgressCounter();
waitingHandler.setMaxSecondaryProgressCounter(100);
}
long progressUnit = bufferedRandomAccessFile.length() / 100;
if (progressUnit == 0) {
progressUnit = 1;
}
index = new HashMap<String, Long>();
String line;
while ((line = bufferedRandomAccessFile.readLine()) != null) {
if (line.startsWith("S")) {
long currentIndex = bufferedRandomAccessFile.getFilePointer();
String[] splitLine = line.split("\\t");
String spectrumTitle = splitLine[1].trim();
index.put(spectrumTitle, currentIndex);
if (waitingHandler != null) {
if (waitingHandler.isRunCanceled()) {
break;
}
waitingHandler.setSecondaryProgressCounter((int) (currentIndex / progressUnit));
}
}
}
}
@Override
public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters)
throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException {
return getAllSpectrumMatches(waitingHandler, searchParameters, null, false);
}
@Override
public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters,
SequenceMatchingPreferences sequenceMatchingPreferences, boolean expandAaCombinations)
throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException {
int tagMapKeyLength = 3;
if (sequenceMatchingPreferences != null) {
if (sequenceMatchingPreferences.getPeptideMapperType() == PeptideMapperType.tree) {
SequenceFactory sequenceFactory = SequenceFactory.getInstance();
tagMapKeyLength = ((ProteinTree) sequenceFactory.getDefaultPeptideMapper()).getInitialTagSize();
}
tagsMap = new HashMap<String, LinkedList<SpectrumMatch>>(1024);
}
if (bufferedRandomAccessFile == null) {
throw new IllegalStateException("The identification file was not set. Please use the appropriate constructor.");
}
LinkedList<SpectrumMatch> spectrumMatches = new LinkedList<SpectrumMatch>();
if (waitingHandler != null) {
waitingHandler.setSecondaryProgressCounterIndeterminate(false);
waitingHandler.resetSecondaryProgressCounter();
waitingHandler.setMaxSecondaryProgressCounter(index.size());
}
for (String title : index.keySet()) {
// remove any html from the title
String decodedTitle = URLDecoder.decode(title, "utf-8");
SpectrumMatch currentMatch = new SpectrumMatch(Spectrum.getSpectrumKey(getMgfFileName(), decodedTitle));
int cpt = 1;
bufferedRandomAccessFile.seek(index.get(title));
String line = bufferedRandomAccessFile.getNextLine().trim();
boolean solutionsFound = false;
if (line.startsWith("P")) {
solutionsFound = true;
}
while (line != null && line.startsWith("P")) {
currentMatch.addHit(Advocate.pNovo.getIndex(), getAssumptionFromLine(line, cpt, searchParameters), true);
cpt++;
line = bufferedRandomAccessFile.getNextLine();
}
if (solutionsFound) {
if (sequenceMatchingPreferences != null) {
HashMap<Integer, HashMap<String, ArrayList<TagAssumption>>> matchTagMap = currentMatch.getTagAssumptionsMap(tagMapKeyLength, sequenceMatchingPreferences);
for (HashMap<String, ArrayList<TagAssumption>> advocateMap : matchTagMap.values()) {
for (String key : advocateMap.keySet()) {
LinkedList<SpectrumMatch> tagMatches = tagsMap.get(key);
if (tagMatches == null) {
tagMatches = new LinkedList<SpectrumMatch>();
tagsMap.put(key, tagMatches);
}
tagMatches.add(currentMatch);
}
}
}
spectrumMatches.add(currentMatch);
}
if (waitingHandler != null) {
if (waitingHandler.isRunCanceled()) {
break;
}
waitingHandler.increaseSecondaryProgressCounter();
}
}
return spectrumMatches;
}
/**
* Returns the spectrum file name. This method assumes that the pNovo output
* file is the mgf file name + ".pnovo.txt"
*
* @return the spectrum file name
*/
public String getMgfFileName() {
return fileName.substring(0, fileName.length() - ".pnovo.txt".length()) + ".mgf";
}
@Override
public String getExtension() {
return ".pnovo.txt";
}
@Override
public void close() throws IOException {
bufferedRandomAccessFile.close();
}
/**
* Returns a Peptide Assumption from a pNovo result line. Note: fixed PTMs
* are not annotated, variable PTMs are marked with the pNovo PTM tag.
*
* @param line the line to parse
* @param rank the rank of the assumption
* @param searchParameters the search parameters
* @return the corresponding assumption
*/
private TagAssumption getAssumptionFromLine(String line, int rank, SearchParameters searchParameters) {
String[] lineComponents = line.trim().split("\t");
Double pNovoScore = new Double(lineComponents[2]);
String pNovoSequence = lineComponents[1];
String peptideSequence = "";
ArrayList<ModificationMatch> modificationMatches = new ArrayList<ModificationMatch>();
PNovoParameters pNovoParameters = (PNovoParameters) searchParameters.getIdentificationAlgorithmParameter(Advocate.pNovo.getIndex());
if (pNovoParameters == null) {
// @TODO: throw exception?
} else {
for (int i = 0; i < pNovoSequence.length(); i++) {
char currentChar = pNovoSequence.charAt(i);
if (pNovoParameters.getPtmResidue(currentChar) != null) {
modificationMatches.add(new ModificationMatch(pNovoParameters.getUtilitiesPtmName(currentChar), true, i + 1));
peptideSequence += pNovoParameters.getPtmResidue(currentChar);
} else {
peptideSequence += currentChar;
}
}
}
AminoAcidSequence aminoAcidSequence = new AminoAcidSequence(peptideSequence);
for (ModificationMatch modificationMatch : modificationMatches) {
aminoAcidSequence.addModificationMatch(modificationMatch.getModificationSite(), modificationMatch);
}
Tag tag = new Tag(0, aminoAcidSequence, 0);
TagAssumption tagAssumption = new TagAssumption(Advocate.pNovo.getIndex(), rank, tag, new Charge(Charge.PLUS, 1), pNovoScore); // @TODO: how to get the charge?
return tagAssumption;
}
@Override
public HashMap<String, ArrayList<String>> getSoftwareVersions() {
HashMap<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>();
ArrayList<String> versions = new ArrayList<String>();
versions.add("unknown"); // @TODO: add version number
result.put("pNovo+", versions);
return result;
}
@Override
public HashMap<String, LinkedList<SpectrumMatch>> getTagsMap() {
return tagsMap;
}
@Override
public void clearTagsMap() {
if (tagsMap != null) {
tagsMap.clear();
}
}
@Override
public boolean hasDeNovoTags() {
return true;
}
}