package org.nextprot.api.web.service.impl; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.nextprot.api.commons.constants.AnnotationCategory; import org.nextprot.api.commons.exception.NextProtException; import org.nextprot.api.core.domain.Entry; import org.nextprot.api.core.domain.Isoform; import org.nextprot.api.core.domain.annotation.Annotation; import org.nextprot.api.core.domain.annotation.AnnotationIsoformSpecificity; import org.nextprot.api.core.domain.annotation.AnnotationVariant; import org.nextprot.api.core.service.EntryBuilderService; import org.nextprot.api.core.service.fluent.EntryConfig; import org.nextprot.api.core.utils.annot.AnnotationUtils; import org.nextprot.api.core.utils.IsoformUtils; import org.nextprot.api.core.utils.PeptideUtils; import org.nextprot.api.web.domain.PepXResponse; import org.nextprot.api.web.domain.PepXResponse.PepXEntryMatch; import org.nextprot.api.web.domain.PepXResponse.PepXIsoformMatch; import org.nextprot.api.web.domain.PepxUtils; import org.nextprot.api.web.service.PepXService; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.beans.factory.annotation.Value; import org.springframework.stereotype.Service; @Service public class PepXServiceImpl implements PepXService { private static final Log LOGGER = LogFactory.getLog(PepXServiceImpl.class); @Autowired private EntryBuilderService entryBuilderService; private String pepXUrl; @Value("${pepx.url}") public void setPepXUrl(String pepXUrl) { this.pepXUrl = pepXUrl; } @Override public List<Entry> findEntriesWithPeptides(String peptides, boolean modeIsoleucine) { List<Entry> entries = new ArrayList<>(); PepXResponse pepXResponse = getPepXResponse(peptides, modeIsoleucine); Set<String> entriesNames = pepXResponse.getEntriesNames(); for (String entryName : entriesNames) { EntryConfig targetIsoconf = EntryConfig.newConfig(entryName).withTargetIsoforms().with("variant").withOverview().withoutAdditionalReferences().withoutProperties(); // .with("variant") Entry entry = entryBuilderService.build(targetIsoconf); List<Annotation> virtualAnnotations = new ArrayList<>(); Set<String> peptidesForEntry = pepXResponse.getPeptidesForEntry(entryName); for(String peptide : peptidesForEntry){ PepXEntryMatch pepxEntryMatch = pepXResponse.getPeptideMatch(peptide).getPepxMatchesForEntry(entryName); if(pepxEntryMatch != null && pepxEntryMatch.getIsoforms() != null && pepxEntryMatch.getIsoforms().size() > 0){ virtualAnnotations.addAll(buildEntryWithVirtualAnnotations(peptide, modeIsoleucine, pepxEntryMatch.getIsoforms(), entry.getAnnotations(), entry.getIsoforms())); } } if((virtualAnnotations != null) && (!virtualAnnotations.isEmpty())){ Entry resultEntry = new Entry(entry.getUniqueName()); //Adds the overview as well resultEntry.setOverview(entry.getOverview()); resultEntry.setAnnotations(virtualAnnotations); entries.add(resultEntry); } } return entries; } private PepXResponse getPepXResponse(String peptides, boolean modeIsoleucine) { String httpRequest = pepXUrl + "?format=json" + (modeIsoleucine ? ("&mode=IL&pep=" + peptides) : ("&pep=" + peptides)); try { URL pepXUrl = new URL(httpRequest); URLConnection px = pepXUrl.openConnection(); BufferedReader in = new BufferedReader(new InputStreamReader(px.getInputStream())); String line; StringBuilder sb = new StringBuilder(); while ((line = in.readLine()) != null) { sb.append(line); } in.close(); return PepxUtils.parsePepxResponse(sb.toString()); } catch (IOException e) { throw new NextProtException(e); } } //This method is static friendly so that it can be tested //////////////////////////////// //CrossedCheckedWithEntryVariantsAndIsoforms static List<Annotation> buildEntryWithVirtualAnnotations(String peptide, boolean modeIsoleucine, List<PepXIsoformMatch> pepXisoforms, List<Annotation> varAnnotations, List<Isoform> isoforms) { List<Annotation> finalAnnotations = new ArrayList<>(); for (PepXIsoformMatch isoNameAndOptionalPosition : pepXisoforms) { String isoformAc = isoNameAndOptionalPosition.getIsoformAccession(); Annotation annotation = new Annotation(); annotation.setAnnotationCategory(AnnotationCategory.PEPX_VIRTUAL_ANNOTATION); annotation.setCvTermName(peptide); annotation.setDescription("This virtual annotation describes the peptide " + peptide + " found in " + isoformAc); AnnotationIsoformSpecificity is = new AnnotationIsoformSpecificity(); is.setIsoformAccession(isoformAc); if (isoNameAndOptionalPosition.getPosition() != null) {// It means there is a variant!!! int startPeptidePosition = isoNameAndOptionalPosition.getPosition(); int endPeptidePosition = startPeptidePosition + peptide.length(); List<Annotation> variantAnnotations = AnnotationUtils.filterAnnotationsBetweenPositions(startPeptidePosition, endPeptidePosition, varAnnotations, isoformAc); Isoform iso = IsoformUtils.getIsoformByIsoName(isoforms, isoformAc); if(iso == null){ throw new NextProtException("The variant at " + startPeptidePosition + " is not specific for this isoform " + isoformAc); } List<Annotation> validAnnotations = filterValidVariantAnnotations(peptide, modeIsoleucine, variantAnnotations, isoformAc, iso.getSequence()); if ((validAnnotations == null) || validAnnotations.isEmpty()) { LOGGER.warn("No valid variants found for isoform " + isoformAc + " at position " + startPeptidePosition + " for peptide " + peptide + " in mode IL:" + modeIsoleucine); continue; //We used to throw an exception, but now we just skip //throw new NextProtException("No valid variants found for isoform " + isoformName + " at position" + startPeptidePosition + " for peptide " + peptide + " in mode IL:" + modeIsoleucine); } if (validAnnotations.size() > 1) { LOGGER.warn("There is more than 1 valid variant (" + validAnnotations.size() + ") for isoform (returning the 1st) " + isoformAc + " between position " + startPeptidePosition + " and " + endPeptidePosition + " for peptide " + peptide + " in mode IL:" + modeIsoleucine); //Takes only the first valid int startPos = validAnnotations.get(0).getStartPositionForIsoform(isoformAc); int endPos = validAnnotations.get(0).getEndPositionForIsoform(isoformAc); is.setFirstPosition(startPos); is.setLastPosition(endPos); AnnotationVariant var = validAnnotations.get(0).getVariant(); annotation.setVariant(var); }else { //one variant on that position int startPos = validAnnotations.get(0).getStartPositionForIsoform(isoformAc); int endPos = validAnnotations.get(0).getEndPositionForIsoform(isoformAc); is.setFirstPosition(startPos); is.setLastPosition(endPos); AnnotationVariant var = validAnnotations.get(0).getVariant(); annotation.setVariant(var); } }else { //No variant Isoform iso = IsoformUtils.getIsoformByIsoName(isoforms, isoformAc); String sequence = (iso != null) ? iso.getSequence() : null; boolean isPeptideContained = PeptideUtils.isPeptideContainedInTheSequence(peptide, sequence, modeIsoleucine); if(!isPeptideContained){ LOGGER.warn("PepX returned a peptide (" + peptide + ") for an isoform (" + isoformAc + ") that is not in the current isoform in neXtProt"); continue; } //We used to throw an exception, but this would break the program (the algorithm could be improved to detect the specific case where pepx return a peptide of length 6 and generate a real error on other cases) //NPreconditions.checkTrue(isPeptideContained, "PepX returned a peptide (" + peptide + ") for an isoform (" + isoformName + ") that is not in the current isoform in neXtProt"); } annotation.addTargetingIsoforms(Arrays.asList(is)); finalAnnotations.add(annotation); } return finalAnnotations; } //This method is static friendly so that it can be tested //////////////////////////////// static List<Annotation> filterValidVariantAnnotations(String peptide, boolean modeIsoLeucine, List<Annotation> variantAnnotations, String isoformName, String originalSequence) { List<Annotation> resultAnnotations = new ArrayList<>(); for(Annotation varAnnot : variantAnnotations){ if(varAnnot.isAnnotationPositionalForIsoform(isoformName)){ //Check that the isoform is valid //In this case the peptide is the sequence and the variant is the peptide if(PeptideUtils.isPeptideContainedInTheSequence(varAnnot.getVariant().getVariant(), peptide, modeIsoLeucine)){//Check if the variant is present in the peptide StringBuilder sequenceWithVariant = new StringBuilder(originalSequence); int variantPosition = varAnnot.getStartPositionForIsoform(isoformName) - 1; char originalAA = originalSequence.charAt(variantPosition); // pepx doesn't handle variant of type insert if(varAnnot.getVariant().getOriginal().length()==0){ continue; } if(originalAA != varAnnot.getVariant().getOriginal().charAt(0)){ throw new NextProtException("The amino acid " + originalAA + " is not present on the sequence of the isoform (position) " + "(" + isoformName + ")" + variantPosition ); } String variantAA = varAnnot.getVariant().getVariant(); if (varAnnot.getVariant().getOriginal().length()==1) { // pepx only handles single substitution or deletion if (variantAA.length()==1) { // substitution 1 aa sequenceWithVariant.setCharAt(variantPosition, varAnnot.getVariant().getVariant().charAt(0)); if(PeptideUtils.isPeptideContainedInTheSequence(peptide, sequenceWithVariant.toString(), modeIsoLeucine)){//Check if the peptide is present with the sequence with the variant resultAnnotations.add(varAnnot); } } else if (variantAA.length()==0) { // deletion of 1 aa sequenceWithVariant.deleteCharAt(variantPosition); if(PeptideUtils.isPeptideContainedInTheSequence(peptide, sequenceWithVariant.toString(), modeIsoLeucine)){//Check if the peptide is present with the sequence with the variant resultAnnotations.add(varAnnot); } } } } } } return resultAnnotations; } /* We started to filter out the results because 3 peptides showing error with pepX : - IHTGEKP - PYKCEECGK - RIHTGEKPYK ex erreur : http://dev-api.nextprot.org/entries/search/peptide?peptide=IHTGEKP&modeIL=true&clientInfo=nextprotTeam&applicationName=PeptideViewer Mis a part l'erreur qu'on a vu hier comme quoi ce premier peptide n'existait pas dans l'entrée Q96MM3, je tiens à noter que ces 3 peptides donnés en exemple sont trouvés à plusieurs positions dans l'isoform d'origine : NX_Q05481 Mais je sais pas si ca joue vraiment. http://localhost:9000/app/?nxentry=NX_P46976&env=dev 3 peptides working with pepX : - TLTTNDAYAK - LVVLATPQVSDSMR - GALVLGSSL */ }