package com.compomics.util.experiment.io.identifications;
import com.compomics.util.experiment.biology.EnzymeFactory;
import com.compomics.util.experiment.identification.identification_parameters.SearchParameters;
import com.compomics.util.experiment.personalization.ExperimentObject;
import com.compomics.util.preferences.DigestionPreferences;
import com.compomics.util.waiting.WaitingHandler;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import uk.ac.ebi.jmzidml.model.mzidml.CvParam;
import uk.ac.ebi.jmzidml.model.mzidml.Enzyme;
import uk.ac.ebi.jmzidml.model.mzidml.ParamList;
import uk.ac.ebi.jmzidml.model.mzidml.SpectrumIdentificationProtocol;
import uk.ac.ebi.jmzidml.model.mzidml.Tolerance;
import uk.ac.ebi.jmzidml.xml.io.MzIdentMLUnmarshaller;
/**
* Reads search parameters from a mzIdentML result files. (Work in progress...)
*
* @author Harald Barsnes
*/
public class MzIdentMLIdfileSearchParametersConverter extends ExperimentObject {
/**
* Constructor.
*
* @param mzIdentMLFile the mzIdentML file
* @param searchParameters the search parameters object to save to
* @param species the current species
* @param waitingHandler the waiting handler
* @return the extracted search parameters
*
* @throws FileNotFoundException if a FileNotFoundException occurs
* @throws IOException if a IOException occurs
* @throws ClassNotFoundException if a ClassNotFoundException occurs
*/
public static String getSearchParameters(File mzIdentMLFile, SearchParameters searchParameters, String species, WaitingHandler waitingHandler)
throws FileNotFoundException, IOException, ClassNotFoundException {
String parametersReport = "<br><b><u>Extracted Search Parameters</u></b><br>";
// unmarshal the mzid file
MzIdentMLUnmarshaller unmarshaller = new MzIdentMLUnmarshaller(mzIdentMLFile);
//MzIdentMLUnmarshaller unmarshaller = new MzIdentMLUnmarshaller(mzIdentMLFile, true); // @TODO: figure out when to use in memory processing
if (waitingHandler != null && waitingHandler.isRunCanceled()) {
mzIdentMLFile = null;
unmarshaller = null;
//unmarshaller.close(); // @TODO: close method is missing?
return null;
}
// get the spectrum identification protocol
SpectrumIdentificationProtocol spectrumIdentificationProtocol = unmarshaller.unmarshal(SpectrumIdentificationProtocol.class);
// get the fragment ion tolerance and type
Double fragmentMinTolerance = null;
Double fragmentMaxTolerance = null;
Boolean fragmentToleranceTypeIsPpm = false;
Tolerance tempFragmentTolerance = spectrumIdentificationProtocol.getFragmentTolerance();
if (tempFragmentTolerance != null) {
for (CvParam cvParam : tempFragmentTolerance.getCvParam()) {
if (cvParam.getAccession().equalsIgnoreCase("MS:1001412")) {
fragmentMaxTolerance = Double.valueOf(cvParam.getValue());
fragmentToleranceTypeIsPpm = cvParam.getUnitAccession().equalsIgnoreCase("UO:0000169");
} else if (cvParam.getAccession().equalsIgnoreCase("MS:1001413")) {
fragmentMinTolerance = Double.valueOf(cvParam.getValue());
fragmentToleranceTypeIsPpm = cvParam.getUnitAccession().equalsIgnoreCase("UO:0000169");
}
}
}
parametersReport += "<br><b>Fragment Ion Mass Tolerance:</b> ";
if (fragmentMinTolerance != null && fragmentMaxTolerance != null) {
Double fragmentTolerance;
if (Math.abs(fragmentMinTolerance) - Math.abs(fragmentMaxTolerance) < 0.0000001) {
fragmentTolerance = Math.abs(fragmentMinTolerance);
} else {
fragmentTolerance = Math.max(Math.abs(fragmentMinTolerance), Math.abs(fragmentMaxTolerance));
}
searchParameters.setPrecursorAccuracy(fragmentTolerance);
if (fragmentToleranceTypeIsPpm) {
searchParameters.setFragmentAccuracyType(SearchParameters.MassAccuracyType.PPM);
parametersReport += fragmentTolerance + " ppm";
} else {
searchParameters.setFragmentAccuracyType(SearchParameters.MassAccuracyType.DA);
parametersReport += fragmentTolerance + " Da";
}
} else {
parametersReport += searchParameters.getFragmentIonAccuracy() + " Da (default)"; // @TODO: what about accuracy in ppm
}
// get the precursor tolerance and type
Double precursorMinTolerance = null;
Double precursorMaxTolerance = null;
Boolean precursorToleranceTypeIsPpm = true;
Tolerance tempPrecursorTolerance = spectrumIdentificationProtocol.getParentTolerance();
for (CvParam cvParam : tempPrecursorTolerance.getCvParam()) {
if (cvParam.getAccession().equalsIgnoreCase("MS:1001412")) {
precursorMaxTolerance = Double.valueOf(cvParam.getValue());
precursorToleranceTypeIsPpm = cvParam.getUnitAccession().equalsIgnoreCase("UO:0000169");
} else if (cvParam.getAccession().equalsIgnoreCase("MS:1001413")) {
precursorMinTolerance = Double.valueOf(cvParam.getValue());
precursorToleranceTypeIsPpm = cvParam.getUnitAccession().equalsIgnoreCase("UO:0000169");
}
}
parametersReport += "<br><b>Precursor Ion Mass Tolerance:</b> ";
if (precursorMinTolerance != null && precursorMaxTolerance != null) {
Double precursorTolerance;
if (Math.abs(precursorMinTolerance) - Math.abs(precursorMaxTolerance) < 0.0000001) {
precursorTolerance = Math.abs(precursorMinTolerance);
} else {
precursorTolerance = Math.max(Math.abs(precursorMinTolerance), Math.abs(precursorMaxTolerance));
}
searchParameters.setPrecursorAccuracy(precursorTolerance);
if (precursorToleranceTypeIsPpm) {
searchParameters.setPrecursorAccuracyType(SearchParameters.MassAccuracyType.PPM);
parametersReport += precursorTolerance + " ppm";
} else {
searchParameters.setPrecursorAccuracyType(SearchParameters.MassAccuracyType.DA);
parametersReport += precursorTolerance + " Da";
}
} else {
parametersReport += searchParameters.getPrecursorAccuracy() + " ppm (default)"; // @TODO: what about accuracy in Dalton
}
// get the enzyme(s)
parametersReport += "<br><br><b>Digestion:</b> ";
List<Enzyme> mzIdEnzymes = spectrumIdentificationProtocol.getEnzymes().getEnzyme();
DigestionPreferences digestionPreferences = new DigestionPreferences();
if (!mzIdEnzymes.isEmpty()) {
digestionPreferences.clear();
for (Enzyme mzIdEnzyme : mzIdEnzymes) {
ParamList paramList = mzIdEnzyme.getEnzymeName();
Integer nMissedCleavages = mzIdEnzyme.getMissedCleavages();
Boolean semiSpecific = mzIdEnzyme.isSemiSpecific();
if (!paramList.getParamGroup().isEmpty()) {
String enzymeId = paramList.getParamGroup().get(0).getName();
com.compomics.util.experiment.biology.Enzyme utilitiesEnzyme = EnzymeFactory.getInstance().getEnzyme(enzymeId);
String enzymeName;
if (utilitiesEnzyme != null) {
enzymeName = utilitiesEnzyme.getName();
parametersReport += utilitiesEnzyme.getName();
} else {
enzymeName = "Trypsin";
utilitiesEnzyme = EnzymeFactory.getInstance().getEnzyme(enzymeName);
parametersReport += utilitiesEnzyme.getName() + " (assumed)";
}
parametersReport += ", ";
if (nMissedCleavages != null) {
parametersReport += nMissedCleavages;
} else {
nMissedCleavages = 2;
parametersReport += nMissedCleavages + " (assumed)";
}
parametersReport += ", ";
DigestionPreferences.Specificity specificity = DigestionPreferences.Specificity.specific;
if (semiSpecific != null) {
if (semiSpecific) {
specificity = DigestionPreferences.Specificity.semiSpecific;
}
parametersReport += specificity;
} else {
parametersReport += specificity + " (assumed)";
}
digestionPreferences.addEnzyme(utilitiesEnzyme);
digestionPreferences.setSpecificity(enzymeName, specificity);
digestionPreferences.setnMissedCleavages(enzymeName, nMissedCleavages);
}
}
} else {
parametersReport += "Trypsin (assumed), 2 allowed missed cleavages (assumed), specific (assumed)";
}
searchParameters.setDigestionPreferences(digestionPreferences);
// set the min/max precursor charge
parametersReport += "<br><br><b>Min Precusor Charge:</b> ";
parametersReport += searchParameters.getMinChargeSearched().value + " (default)";
parametersReport += "<br><b>Max Precusor Charge:</b> ";
parametersReport += searchParameters.getMaxChargeSearched().value + " (default)";
// taxonomy and species
parametersReport += "<br><br><b>Species:</b> ";
if (species == null || species.length() == 0) {
parametersReport += "unknown";
} else {
parametersReport += species;
}
// get the modifications
// ModificationProfile modificationProfile = new ModificationProfile();
//
// ModificationParams modifications = spectrumIdentificationProtocol.getModificationParams();
//
// for (SearchModification tempMod : modifications.getSearchModification()) {
// if (!tempMod.getCvParam().isEmpty()) {
//
// CvParam cvParam = tempMod.getCvParam().get(0); // example: <cvParam cvRef="UNIMOD" accession="UNIMOD:4" name="Carbamidomethyl" value="57.021464"/>
//
// // @TODO: convert to utilities ptms!
//// if (tempMod.isFixedMod()) {
//// modificationProfile.addFixedModification(null);
//// } else {
//// modificationProfile.addVariableModification(null);
//// }
// }
// }
// searchParameters.setModificationProfile(modificationProfile);
// get the database
// DataCollection dataCollection = unmarshaller.unmarshal(DataCollection.class);
// List<SearchDatabase> databases = dataCollection.getInputs().getSearchDatabase();
// String databaseLocation = null;
//
// if (!databases.isEmpty()) {
// databaseLocation = databases.get(0).getLocation();
// searchParameters.setFastaFile(new File(databaseLocation));
// }
// close file
mzIdentMLFile = null;
unmarshaller = null;
//unmarshaller.close(); // @TODO: close method is missing?
if (waitingHandler != null && waitingHandler.isRunCanceled()) {
return null;
}
return parametersReport;
}
}