package com.compomics.util.experiment.io.identifications.idfilereaders;
import com.compomics.util.Util;
import com.compomics.util.experiment.biology.AminoAcid;
import com.compomics.util.experiment.biology.AminoAcidSequence;
import com.compomics.util.experiment.identification.Advocate;
import com.compomics.util.experiment.identification.identification_parameters.SearchParameters;
import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory;
import com.compomics.util.experiment.identification.spectrum_assumptions.TagAssumption;
import com.compomics.util.experiment.identification.matches.ModificationMatch;
import com.compomics.util.experiment.identification.matches.SpectrumMatch;
import com.compomics.util.experiment.identification.amino_acid_tags.Tag;
import com.compomics.util.experiment.identification.identification_parameters.tool_specific.DirecTagParameters;
import com.compomics.util.experiment.identification.protein_inference.PeptideMapperType;
import com.compomics.util.experiment.identification.protein_inference.proteintree.ProteinTree;
import com.compomics.util.experiment.io.identifications.IdfileReader;
import com.compomics.util.experiment.massspectrometry.Charge;
import com.compomics.util.experiment.massspectrometry.Spectrum;
import com.compomics.util.experiment.massspectrometry.SpectrumFactory;
import com.compomics.util.experiment.personalization.ExperimentObject;
import com.compomics.util.preferences.SequenceMatchingPreferences;
import com.compomics.util.waiting.WaitingHandler;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.Set;
import javax.xml.bind.JAXBException;
import uk.ac.ebi.pride.tools.braf.BufferedRandomAccessFile;
/**
* An identification file reader for Direct tag results.
*
* @author Marc Vaudel
*/
public class DirecTagIdfileReader extends ExperimentObject implements IdfileReader {
/**
* The name of the tags generator used to create the file.
*/
private String tagsGenerator;
/**
* The version of the tags generator.
*/
private String tagsGeneratorVersion;
/**
* The copyright.
*/
private String copyRight;
/**
* The license.
*/
private String license;
/**
* The time of sequencing start.
*/
private String timeStart;
/**
* The time of sequencing end.
*/
private String timeEnd;
/**
* The tagging time.
*/
private Double taggingTimeSeconds;
/**
* The number of processing nodes.
*/
private Integer nProcessingNode;
/**
* The file used as input.
*/
private String inputFile;
/**
* The tags parameters in a map.
*/
private HashMap<String, String> tagsParameters = new HashMap<String, String>();
/**
* Returns the content of the columns for a spectrum line. Name > index
* in the column.
*/
private HashMap<String, Integer> spectrumLineContent = new HashMap<String, Integer>();
/**
* Returns the content of the columns for a tag line. Name > index in the
* column.
*/
private HashMap<String, Integer> tagLineContent = new HashMap<String, Integer>();
/**
* The indexes at which are the spectra. Spectrum ID > index.
*/
private HashMap<Integer, Long> spectrumIndexes = new HashMap<Integer, Long>();
/**
* The indexes at which are the tags. Spectrum ID > indexes.
*/
private HashMap<Integer, ArrayList<Long>> tagIndexes = new HashMap<Integer, ArrayList<Long>>();
/**
* The random access file used.
*/
private BufferedRandomAccessFile bufferedRandomAccessFile;
/**
* The file inspected.
*/
private File tagFile;
/**
* The spectrum factory used to retrieve spectrum titles.
*/
private SpectrumFactory spectrumFactory = SpectrumFactory.getInstance();
/**
* The mass to add to the C-terminal gap so that is corresponds to a peptide
* fragment.
*/
public final double cTermCorrection = 0;
/**
* The mass to add to the N-terminal gap so that is corresponds to a peptide
* fragment.
*/
public final double nTermCorrection = 0;
/**
* Map of the tags found indexed by amino acid sequence.
*/
private HashMap<String, LinkedList<SpectrumMatch>> tagsMap;
/**
* The DirecTag parameters.
*/
private DirecTagParameters direcTagParameters;
/**
* The residues modified by the different PTMs in the DynamicMods tag.
* Indexed on the symbol used to represent the PTM.
*/
private HashMap<Character, Character> dynamicModsResidues;
/**
* Default constructor for the purpose of instantiation.
*/
public DirecTagIdfileReader() {
}
/**
* Constructors, parses a file but does not index the results.
*
* @param tagFile the file to parse
*
* @throws FileNotFoundException if a FileNotFoundException occurs
* @throws IOException if an IOException occurs
*/
public DirecTagIdfileReader(File tagFile) throws FileNotFoundException, IOException {
this(tagFile, false);
}
/**
* Constructors, parses a file.
*
* @param tagFile the file to parse
* @param indexResults if true the results section will be indexed
*
* @throws FileNotFoundException if a FileNotFoundException occurs
* @throws IOException if an IOException occurs
*/
public DirecTagIdfileReader(File tagFile, boolean indexResults) throws FileNotFoundException, IOException {
this.tagFile = tagFile;
bufferedRandomAccessFile = new BufferedRandomAccessFile(tagFile, "r", 1024 * 100);
parseFile(indexResults);
}
/**
* Returns the name of the different parameters names found.
*
* @return the name of the different parameters names found
*/
public Set<String> getTagsParametersNames() {
return tagsParameters.keySet();
}
/**
* Returns the tagging parameter corresponding to a given parameter name.
*
* @param tagParameterName the name of the parameter of interest
*
* @return the parameter of interest
*/
public String getTagParameter(String tagParameterName) {
return tagsParameters.get(tagParameterName);
}
/**
* Parses a result file.
*
* @param indexResults if true the results section will be indexed
*
* @throws FileNotFoundException if a FileNotFoundException occurs
* @throws IOException if an IOException occurs
*/
private void parseFile(boolean indexResults) throws FileNotFoundException, IOException {
try {
boolean endOfFile = parseParameters();
if (!endOfFile) {
endOfFile = parseTagParameters();
}
if (!endOfFile) {
endOfFile = parseHeaders();
}
if (!endOfFile && indexResults) {
parseResults();
}
} finally {
bufferedRandomAccessFile.close();
}
}
/**
* Parses the parameters section.
*
* @return true if the end of the file was reached
*
* @throws IOException if an IOException occurs
*/
private boolean parseParameters() throws IOException {
String line;
while ((line = bufferedRandomAccessFile.readLine()) != null) {
if (line == null || line.startsWith("H TagsParameters")) {
break;
} else if (line == null) {
throw new IOException("Unexpected end of file while parsing the parameters.");
} else if (line.startsWith("H(S)") || line.startsWith("H(T)") || line.startsWith("S") || line.startsWith("T")) {
throw new IOException("Unexpected end of parameters section.");
} else {
line = line.substring(1).trim();
if (line.startsWith("TagsGeneratorVersion")) {
tagsGeneratorVersion = line.substring(line.indexOf("\t")).trim();
} else if (line.startsWith("TagsGenerator")) {
tagsGenerator = line.substring(line.indexOf("\t")).trim();
} else if (line.contains("(c)")) {
copyRight = line;
} else if (line.contains("License")) {
license = line;
} else if (line.startsWith("Tagging started at")) {
tagsGeneratorVersion = line.substring(line.indexOf("Tagging started at")).trim();
} else if (line.startsWith("Tagging started at")) {
timeStart = line.substring(line.indexOf("Tagging started at")).trim();
} else if (line.startsWith("Tagging finished at")) {
timeEnd = line.substring(line.indexOf("Tagging finished at")).trim();
} else if (line.startsWith("Total tagging time:")) {
line = line.substring(line.indexOf(":") + 1).trim();
line = line.substring(0, line.indexOf(" ")).trim();
try {
taggingTimeSeconds = new Double(line);
} catch (Exception e) {
e.printStackTrace();
}
} else if (line.contains("node")) {
line = line.substring(line.indexOf(" ")).trim();
line = line.substring(0, line.indexOf(" ")).trim();
try {
nProcessingNode = new Integer(line);
} catch (Exception e) {
// ignore
}
} else if (line.startsWith("InputFile")) {
inputFile = line.substring(line.indexOf("\t")).trim();
}
}
}
return line == null;
}
/**
* Parses the tag parameters.
*
* @return true if the end of the file was reached
*
* @throws IOException if an IOException occurs
*/
private boolean parseTagParameters() throws IOException {
String line;
while ((line = bufferedRandomAccessFile.readLine()) != null) {
if (line.trim().isEmpty()) {
break;
} else if (line == null) {
throw new IOException("Unexpected end of file while parsing the tag parameters.");
} else if (line.startsWith("H(S)") || line.startsWith("H(T)") || line.startsWith("S") || line.startsWith("T")) {
throw new IOException("Unexpected end of tag parameters section.");
} else {
line = line.substring(1).trim();
String[] components = line.split(", ");
for (String component : components) {
int index = component.indexOf(": ");
if (index != -1) {
String key = component.substring(0, index).trim();
String value = component.substring(index + 1).trim();
tagsParameters.put(key, value);
}
}
}
}
return line == null;
}
/**
* Parses the tables headers.
*
* @return true if the end of the file was reached
*
* @throws IOException if an IOException occurs
*/
private boolean parseHeaders() throws IOException {
String line = bufferedRandomAccessFile.readLine();
if (line != null) {
parseHeaderLine(line);
}
line = bufferedRandomAccessFile.readLine();
if (line != null) {
parseHeaderLine(line);
}
return line == null;
}
/**
* Parses a line corresponding to a header.
*
* @param linea line corresponding to a header
*
* @throws IOException if an IOException occurs
*/
private void parseHeaderLine(String line) throws IOException {
if (line.startsWith("S") || line.startsWith("T")) {
throw new IOException("No Header found.");
}
if (line.startsWith("H(S)")) {
line = line.substring(4).trim();
String[] components = line.split("\t");
for (int i = 0; i < components.length; i++) {
spectrumLineContent.put(components[i], i);
}
} else if (line.startsWith("H(T)")) {
line = line.substring(4).trim();
String[] components = line.split("\t");
for (int i = 0; i < components.length; i++) {
tagLineContent.put(components[i], i);
}
}
}
/**
* Parses the results section.
*
* @throws IOException if an IOException occurs
*/
private void parseResults() throws IOException {
String line;
Integer sIdIndex = spectrumLineContent.get("Index");
int scpt = 0;
while ((line = bufferedRandomAccessFile.readLine()) != null) {
long lineIndex = bufferedRandomAccessFile.getFilePointer();
Integer id = ++scpt;
if (line.startsWith("S")) {
line = line.substring(1).trim();
if (sIdIndex != null) {
String[] components = line.split("\t");
id = new Integer(components[sIdIndex]);
}
spectrumIndexes.put(id, lineIndex);
} else if (line.startsWith("T")) {
ArrayList<Long> indexes = tagIndexes.get(id);
if (indexes == null) {
indexes = new ArrayList<Long>();
tagIndexes.put(id, indexes);
}
indexes.add(lineIndex);
}
}
}
/**
* Returns a component in a spectrum line.
*
* @param spectrumId the id of the spectrum of interest
* @param componentName the name of the component of interest according to
* the header
*
* @return the component
*
* @throws IOException if an IOException occurs
*/
public String getSpectrumComponent(int spectrumId, String componentName) throws IOException {
long index = spectrumIndexes.get(spectrumId);
bufferedRandomAccessFile.seek(index);
String line = bufferedRandomAccessFile.readLine();
line = line.substring(1).trim();
String[] components = line.split("\t");
Integer columnIndex = spectrumLineContent.get(componentName);
if (columnIndex != null && columnIndex < components.length) {
return components[columnIndex];
}
return null;
}
/**
* Returns all the spectrum IDs found.
*
* @return the spectrum IDs found in a set
*/
public Set<Integer> getSpectrumIds() {
return spectrumIndexes.keySet();
}
/**
* Returns all the spectrum components names found in the header.
*
* @return all the spectrum components names found in the header
*/
public Set<String> getSpectrumComponentNames() {
return spectrumLineContent.keySet();
}
/**
* Returns the tag components associated to a spectrum in a map: component
* name -> value.
*
* @param spectrumId the id of the spectrum
*
* @return the tag components associated to a spectrum in a map
*
* @throws IOException if an IOException occurs
*/
private ArrayList<HashMap<String, String>> getTags(int spectrumId) throws IOException {
ArrayList<HashMap<String, String>> result = new ArrayList<HashMap<String, String>>();
ArrayList<Long> indexes = tagIndexes.get(spectrumId);
if (indexes != null) {
for (Long index : indexes) {
bufferedRandomAccessFile.seek(index);
String line = bufferedRandomAccessFile.readLine();
line = line.substring(1).trim();
String[] components = line.split("\t");
HashMap<String, String> lineMap = new HashMap<String, String>();
for (String componentName : tagLineContent.keySet()) {
int columnIndex = tagLineContent.get(componentName);
String value = components[columnIndex];
lineMap.put(componentName, value);
}
result.add(lineMap);
}
}
return result;
}
@Override
public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters) throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException {
return getAllSpectrumMatches(waitingHandler, searchParameters, null, false);
}
@Override
public LinkedList<SpectrumMatch> getAllSpectrumMatches(WaitingHandler waitingHandler, SearchParameters searchParameters,
SequenceMatchingPreferences sequenceMatchingPreferences, boolean expandAaCombinations)
throws IOException, IllegalArgumentException, SQLException, ClassNotFoundException, InterruptedException, JAXBException {
direcTagParameters = (DirecTagParameters) searchParameters.getAlgorithmSpecificParameters().get(Advocate.direcTag.getIndex());
// get the ptm residues from the DynamicMods field
dynamicModsResidues = new HashMap<Character, Character>();
String dynamicMods = tagsParameters.get("DynamicMods"); // assume something like: "M 0 15.994915 N 1 0.984016 Q 2 0.984016"
dynamicMods = dynamicMods.trim();
if (!dynamicMods.isEmpty()) {
String[] modElements = dynamicMods.split(" ");
int index = 0;
while (index + 2 < modElements.length) {
dynamicModsResidues.put(modElements[index + 1].charAt(0), modElements[index].charAt(0));
index += 3;
}
}
int tagMapKeyLength = 3;
if (sequenceMatchingPreferences != null) {
if (sequenceMatchingPreferences.getPeptideMapperType() == PeptideMapperType.tree) {
SequenceFactory sequenceFactory = SequenceFactory.getInstance();
tagMapKeyLength = ((ProteinTree) sequenceFactory.getDefaultPeptideMapper()).getInitialTagSize();
}
tagsMap = new HashMap<String, LinkedList<SpectrumMatch>>(1024);
}
String spectrumFileName = Util.getFileName(getInputFile());
if (waitingHandler != null && spectrumFactory.fileLoaded(spectrumFileName)) {
waitingHandler.setMaxSecondaryProgressCounter(spectrumFactory.getNSpectra(spectrumFileName));
waitingHandler.setSecondaryProgressCounter(0);
}
LinkedList<SpectrumMatch> result = new LinkedList<SpectrumMatch>();
int sCpt = 0;
Integer sIdColumnIndex = spectrumLineContent.get("ID");
Integer chargeColumnIndex = spectrumLineContent.get("Charge");
BufferedReader reader = new BufferedReader(new FileReader(tagFile));
try {
Integer lastId = null, lastCharge = null;
int rank = 0;
SpectrumMatch currentMatch = null;
String line;
while ((line = reader.readLine()) != null) {
if (line.startsWith("S")) {
Integer sId = ++sCpt;
rank = 0;
if (sIdColumnIndex != null) {
line = line.substring(1).trim();
String[] components = line.split("\t");
String id = components[sIdColumnIndex];
sId = new Integer(id.substring(id.indexOf("=") + 1));
String chargeString = components[chargeColumnIndex];
lastCharge = new Integer(chargeString);
}
if (!sId.equals(lastId)) {
if (currentMatch != null && currentMatch.hasAssumption()) {
if (sequenceMatchingPreferences != null) {
HashMap<Integer, HashMap<String, ArrayList<TagAssumption>>> matchTagMap = currentMatch.getTagAssumptionsMap(tagMapKeyLength, sequenceMatchingPreferences);
for (HashMap<String, ArrayList<TagAssumption>> advocateMap : matchTagMap.values()) {
for (String key : advocateMap.keySet()) {
LinkedList<SpectrumMatch> tagMatches = tagsMap.get(key);
if (tagMatches == null) {
tagMatches = new LinkedList<SpectrumMatch>();
tagsMap.put(key, tagMatches);
}
tagMatches.add(currentMatch);
}
}
}
result.add(currentMatch);
}
int utilitiesId = sId + 1; // first spectrum is 1 in utilities
String spectrumTitle = utilitiesId + "";
if (spectrumFactory.fileLoaded(spectrumFileName)) {
spectrumTitle = spectrumFactory.getSpectrumTitle(spectrumFileName, utilitiesId);
}
currentMatch = new SpectrumMatch(Spectrum.getSpectrumKey(spectrumFileName, spectrumTitle));
currentMatch.setSpectrumNumber(utilitiesId);
lastId = sId;
}
if (waitingHandler != null && spectrumFactory.fileLoaded(spectrumFileName)) {
waitingHandler.increaseSecondaryProgressCounter();
}
} else if (line.startsWith("T")) {
++rank;
TagAssumption tagAssumption = getAssumptionFromLine(line, rank);
//@TODO: check with the developers if this is correct
tagAssumption.setIdentificationCharge(new Charge(Charge.PLUS, lastCharge));
currentMatch.addHit(Advocate.direcTag.getIndex(), tagAssumption, true);
}
}
if (currentMatch != null && currentMatch.hasAssumption()) {
if (sequenceMatchingPreferences != null) {
HashMap<Integer, HashMap<String, ArrayList<TagAssumption>>> matchTagMap = currentMatch.getTagAssumptionsMap(tagMapKeyLength, sequenceMatchingPreferences);
for (HashMap<String, ArrayList<TagAssumption>> advocateMap : matchTagMap.values()) {
for (String key : advocateMap.keySet()) {
LinkedList<SpectrumMatch> tagMatches = tagsMap.get(key);
if (tagMatches == null) {
tagMatches = new LinkedList<SpectrumMatch>();
tagsMap.put(key, tagMatches);
}
tagMatches.add(currentMatch);
}
}
}
result.add(currentMatch);
}
} finally {
reader.close();
}
return result;
}
/**
* Returns the assumption associated to a tag line. If a modification index
* is found, an "X" is put in the tag sequence and a modification match
* named after the given index is added.
*
* @param line the line
* @param rank the rank of the assumption
*
* @return the assumption associated to a tag line
*/
private TagAssumption getAssumptionFromLine(String line, int rank) {
line = line.substring(1).trim();
String[] components = line.split("\t");
Integer cGapIndex = tagLineContent.get("cTerminusMass");
if (cGapIndex == null) {
throw new IllegalArgumentException("Column cTerminusMass not found.");
}
Double cGap = new Double(components[cGapIndex]);
if (cGap > 0 && cGap < cTermCorrection) {
throw new IllegalArgumentException("Incompatible c-term gap " + cGap);
} else if (cGap > 0) {
cGap += cTermCorrection;
}
Integer nGapIndex = tagLineContent.get("nTerminusMass");
if (nGapIndex == null) {
throw new IllegalArgumentException("Column nTerminusMass not found.");
}
Double nGap = new Double(components[nGapIndex]);
Integer tagIndex = tagLineContent.get("Tag");
if (tagIndex == null) {
throw new IllegalArgumentException("Column Tag not found.");
}
String tagSequence = components[tagIndex];
StringBuilder residues = new StringBuilder(tagSequence.length());
HashMap<Integer, ModificationMatch> modificationMatches = new HashMap<Integer, ModificationMatch>();
for (int i = 0; i < tagSequence.length(); i++) {
char charAtI = tagSequence.charAt(i);
try {
AminoAcid aa = AminoAcid.getAminoAcid(charAtI);
residues.append(aa.singleLetterCode);
} catch (IllegalArgumentException e) {
try {
// modified residue
String modIndexString = charAtI + "";
int modIndex = new Integer(modIndexString);
String utilitiesPtm = direcTagParameters.getUtilitiesPtmName(modIndex);
modificationMatches.put(i + 1, new ModificationMatch(utilitiesPtm, true, i + 1));
residues.append(dynamicModsResidues.get(modIndexString.charAt(0)));
} catch (Exception e1) {
throw new IllegalArgumentException("No amino acid or modification could be mapped to tag component \"" + charAtI + "\" in tag \"" + tagSequence + "\".");
}
}
}
AminoAcidSequence tagAaSequence = new AminoAcidSequence(residues.toString());
for (int i : modificationMatches.keySet()) {
tagAaSequence.addModificationMatch(i, modificationMatches.get(i));
}
Tag tag = new Tag(nGap, tagAaSequence, cGap);
Integer chargeIndex = tagLineContent.get("TagChargeState");
if (chargeIndex == null) {
throw new IllegalArgumentException("Column TagChargeState not found.");
}
int charge = new Integer(components[chargeIndex]);
Integer eValueIndex = tagLineContent.get("Total");
if (eValueIndex == null) {
throw new IllegalArgumentException("Column Total not found.");
}
double eValue = new Double(components[eValueIndex]);
return new TagAssumption(Advocate.direcTag.getIndex(), rank, tag, new Charge(Charge.PLUS, charge), eValue);
}
/**
* Returns the tags generator used to create the file.
*
* @return the tags generator used to create the file
*/
public String getTagsGenerator() {
return tagsGenerator;
}
/**
* Returns the version of the tags generator used to create the file.
*
* @return the version of the tags generator used to create the file
*/
public String getTagsGeneratorVersion() {
return tagsGeneratorVersion;
}
/**
* Returns the copyright.
*
* @return the copyright
*/
public String getCopyRight() {
return copyRight;
}
/**
* Returns the license information of this file.
*
* @return the license information of this file
*/
public String getLicense() {
return license;
}
/**
* Returns the starting time of the tagging as given in the file.
*
* @return the starting time of the tagging
*/
public String getTimeStart() {
return timeStart;
}
/**
* Returns the ending time of the tagging as given in the file.
*
* @return the ending time of the tagging
*/
public String getTimeEnd() {
return timeEnd;
}
/**
* Returns the tagging time in seconds as listed in the file.
*
* @return the tagging time in seconds as listed in the file
*/
public Double getTaggingTimeSeconds() {
return taggingTimeSeconds;
}
/**
* Returns the number of processing nodes used.
*
* @return the number of processing nodes used
*/
public Integer getnProcessingNode() {
return nProcessingNode;
}
/**
* Returns the spectrum file name as found in the parameters section.
*
* @return the spectrum file name
*/
public File getInputFile() {
return new File(inputFile);
}
@Override
public String getExtension() {
return ".tags";
}
@Override
public void close() throws IOException {
bufferedRandomAccessFile.close();
}
@Override
public HashMap<String, ArrayList<String>> getSoftwareVersions() {
HashMap<String, ArrayList<String>> result = new HashMap<String, ArrayList<String>>();
ArrayList<String> versions = new ArrayList<String>();
versions.add(tagsGeneratorVersion);
result.put(tagsGenerator, versions);
return result;
}
@Override
public HashMap<String, LinkedList<SpectrumMatch>> getTagsMap() {
return tagsMap;
}
@Override
public void clearTagsMap() {
if (tagsMap != null) {
tagsMap.clear();
}
}
@Override
public boolean hasDeNovoTags() {
return true;
}
}