package com.compomics.util.experiment.identification.protein_inference.executable; import com.compomics.util.experiment.biology.AminoAcidSequence; import com.compomics.util.experiment.biology.MassGap; import com.compomics.util.experiment.identification.amino_acid_tags.Tag; import com.compomics.util.experiment.identification.amino_acid_tags.TagComponent; import com.compomics.util.experiment.identification.identification_parameters.PtmSettings; import com.compomics.util.experiment.identification.protein_inference.PeptideProteinMapping; import com.compomics.util.experiment.identification.protein_inference.fm_index.FMIndex; import com.compomics.util.experiment.identification.protein_sequences.SequenceFactory; import com.compomics.util.gui.waiting.waitinghandlers.WaitingHandlerCLIImpl; import com.compomics.util.preferences.IdentificationParameters; import com.compomics.util.preferences.PeptideVariantsPreferences; import com.compomics.util.preferences.SequenceMatchingPreferences; import java.io.File; import java.io.PrintWriter; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; import java.util.regex.Pattern; /** * Command line peptide mapping. * * @author Dominik Kopczynski */ public class PeptideMapping { /** * Main class. * * @param args command line arguments */ public static void main(String[] args) { if ((args.length > 0 && (args[0].equals("-h") || args[0].equals("--help"))) || args.length < 4 || (!args[0].equals("-p") && !args[0].equals("-t"))) { System.err.println("PeptideMapping: a tool to map peptides or sequence tags against a given proteome."); System.err.println("usage: PeptideMapping -[p|t] input-fasta input-peptide/tag-csv output-csv [utilities-parameter-file]"); System.err.println(); System.err.println("Options are:"); System.err.println("\t-p\tpeptide mapping"); System.err.println("\t-t\tsequence tag mapping"); System.err.println("\t-h\tprint this info"); System.err.println(); System.err.println("Default parameters:"); System.err.println("\tindexing method:\t\tfm-index"); System.err.println("\tframentation tolerance [Da]:\t0.02"); System.exit(-1); } System.err.println("Start reading FASTA file"); WaitingHandlerCLIImpl waitingHandlerCLIImpl = new WaitingHandlerCLIImpl(); File sequences = new File(args[1]); SequenceFactory sequenceFactory = SequenceFactory.getInstance(); try { sequenceFactory.loadFastaFile(sequences, waitingHandlerCLIImpl); } catch (Exception e) { System.err.println("Error: cound not open FASTA file"); System.exit(-1); } double tolerance = 0.02; PtmSettings ptmSettings = null; PeptideVariantsPreferences peptideVariantsPreferences = null; SequenceMatchingPreferences sequenceMatchingPreferences = null; if (args.length >= 5) { File parameterFile = new File(args[4]); IdentificationParameters identificationParameters = null; try { identificationParameters = IdentificationParameters.getIdentificationParameters(parameterFile); } catch (Exception e) { System.err.println("Error: cound not open / parse parameter file"); System.exit(-1); } tolerance = identificationParameters.getSearchParameters().getFragmentIonAccuracy(); System.err.println("New fragment m/z tolerance: " + tolerance + " Da"); ptmSettings = identificationParameters.getSearchParameters().getPtmSettings(); peptideVariantsPreferences = PeptideVariantsPreferences.getNoVariantPreferences(); sequenceMatchingPreferences = identificationParameters.getSequenceMatchingPreferences(); } else { ptmSettings = new PtmSettings(); peptideVariantsPreferences = PeptideVariantsPreferences.getNoVariantPreferences(); sequenceMatchingPreferences = new SequenceMatchingPreferences(); sequenceMatchingPreferences.setSequenceMatchingType(SequenceMatchingPreferences.MatchingType.indistiguishableAminoAcids); sequenceMatchingPreferences.setLimitX(0.25); } System.err.println("Start indexing proteome"); long startTime = System.nanoTime(); FMIndex fmIndex = new FMIndex(waitingHandlerCLIImpl, true, ptmSettings, peptideVariantsPreferences); double diffTime = System.nanoTime() - startTime; System.err.println(); System.err.println("Indexing took " + (diffTime / 1e9) + " seconds and consumes " + (((float) fmIndex.getAllocatedBytes()) / 1e6) + " MB"); if (args[0].equals("-p")) { ArrayList<String> peptides = new ArrayList<String>(); try { for (String line : Files.readAllLines(Paths.get(args[2]))) { if (!Pattern.matches("[a-zA-Z]+", line)) { System.err.println("Error: invalid character in line '" + line + "'"); System.exit(-1); } peptides.add(line.toUpperCase()); } } catch (Exception e) { System.err.println("Error: cound not open input list"); System.exit(-1); } waitingHandlerCLIImpl.setSecondaryProgressCounterIndeterminate(false); waitingHandlerCLIImpl.setMaxSecondaryProgressCounter(peptides.size()); waitingHandlerCLIImpl.setSecondaryProgressCounter(0); ArrayList<PeptideProteinMapping> allPeptideProteinMappings = new ArrayList<PeptideProteinMapping>(); // starting the mapping startTime = System.nanoTime(); for (int i = 0; i < peptides.size(); ++i) { String peptide = peptides.get(i); waitingHandlerCLIImpl.increaseSecondaryProgressCounter(); ArrayList<PeptideProteinMapping> peptideProteinMappings = fmIndex.getProteinMapping(peptide, sequenceMatchingPreferences); allPeptideProteinMappings.addAll(peptideProteinMappings); } diffTime = System.nanoTime() - startTime; System.err.println(); System.err.println("Mapping " + peptides.size() + " peptides took " + (diffTime / 1e9) + " seconds"); try { PrintWriter writer = new PrintWriter(args[3], "UTF-8"); for (PeptideProteinMapping peptideProteinMapping : allPeptideProteinMappings) { String peptide = peptideProteinMapping.getPeptideSequence(); String accession = peptideProteinMapping.getProteinAccession(); int startIndex = peptideProteinMapping.getIndex(); writer.println(peptide + "," + accession + "," + startIndex); } writer.close(); } catch (Exception e) { System.err.println("Error: could not write into file '" + args[3] + "'"); System.exit(-1); } } else { ArrayList<Tag> tags = new ArrayList<Tag>(); ArrayList<Integer> tagIndexes = new ArrayList<Integer>(); try { for (String line : Files.readAllLines(Paths.get(args[2]))) { Tag tag = new Tag(); for (String part : line.split(",")) { if (Pattern.matches("[a-zA-Z]+", part)) { tag.addAminoAcidSequence(new AminoAcidSequence(part)); } else { try { double mass = Double.parseDouble(part); tag.addMassGap(mass); } catch (NumberFormatException e) { System.err.println("Error: line contains no valid tag: '" + line + "'"); System.exit(-1); } } } tags.add(tag); } } catch (Exception e) { System.err.println("Error: cound not open input list"); System.exit(-1); } waitingHandlerCLIImpl.setSecondaryProgressCounterIndeterminate(false); waitingHandlerCLIImpl.setMaxSecondaryProgressCounter(tags.size()); waitingHandlerCLIImpl.setSecondaryProgressCounter(0); ArrayList<PeptideProteinMapping> allPeptideProteinMappings = new ArrayList<PeptideProteinMapping>(); // starting the mapping startTime = System.nanoTime(); try { for (int i = 0; i < tags.size(); ++i) { waitingHandlerCLIImpl.increaseSecondaryProgressCounter(); ArrayList<PeptideProteinMapping> peptideProteinMappings = fmIndex.getProteinMapping(tags.get(i), null, sequenceMatchingPreferences, tolerance); allPeptideProteinMappings.addAll(peptideProteinMappings); for (int j = 0; j < peptideProteinMappings.size(); ++j) { tagIndexes.add(i); } } } catch (Exception e) { e.printStackTrace(); System.err.println("Error: an unexpected error happened."); System.exit(-1); } diffTime = System.nanoTime() - startTime; System.err.println(); System.err.println("Mapping " + tags.size() + " tags took " + (diffTime / 1e9) + " seconds"); try { PrintWriter writer = new PrintWriter(args[3], "UTF-8"); for (int i = 0; i < allPeptideProteinMappings.size(); ++i) { PeptideProteinMapping peptideProteinMapping = allPeptideProteinMappings.get(i); String peptide = peptideProteinMapping.getPeptideSequence(); String accession = peptideProteinMapping.getProteinAccession(); int startIndex = peptideProteinMapping.getIndex(); for (TagComponent tagComponent : tags.get(tagIndexes.get(i)).getContent()) { if (tagComponent instanceof MassGap) { writer.print(tagComponent.getMass()); } if (tagComponent instanceof AminoAcidSequence) { writer.print(tagComponent.asSequence()); } writer.print(","); } writer.println(peptide + "," + accession + "," + startIndex); } writer.close(); } catch (Exception e) { System.err.println("Error: could not write into file '" + args[3] + "'"); System.exit(-1); } } } }