package org.nextprot.api.core.utils; import org.nextprot.api.commons.exception.NextProtException; import org.nextprot.api.core.dao.EntityName; import org.nextprot.api.core.domain.Entry; import org.nextprot.api.core.domain.Isoform; import java.util.Comparator; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; /** * Utils about isoforms * @author Daniel Teixeira http://github.com/ddtxra * */ public class IsoformUtils { /** * Gets the isoform by its name * @param isoforms * @param isoformName * @return * TODO: check if redundant with getIsoformByName */ public static Isoform getIsoformByIsoName(List<Isoform> isoforms, String isoformName) { //TODO the isoforms should be stored in a map at the level of the Entry for(Isoform iso : isoforms){ if(iso.getUniqueName().replaceAll("NX_", "").equals(isoformName.replaceAll("NX_", ""))) return iso; } return null; } /** * Get the canonical isoform of the given entry * @param entry the entry to fetch canonical isoform * @return the canonical isoform * @throws NextProtException if canonical isoform is missing */ public static Isoform getCanonicalIsoform(Entry entry) { for (Isoform isoform : entry.getIsoforms()) { if (isoform.isCanonicalIsoform()) return isoform; } throw new NextProtException(entry.getUniqueName()+" lacks canonical isoform"); } /** * Get all isoforms except the given one */ public static List<Isoform> getOtherIsoforms(Entry entry, String isoformUniqueName) { return entry.getIsoforms().stream() .filter(iso -> !iso.getUniqueName().equals(isoformUniqueName)) .collect(Collectors.toList()); } public static Isoform getIsoformByName(Entry entry, String name) { return getIsoformByName(entry.getIsoforms(), name); } public static Isoform getIsoformByName(List<Isoform> isoforms, String name) { if (name==null) return null; for (Isoform iso: isoforms) { if (name.equals(iso.getUniqueName())) return iso; EntityName mainEname = iso.getMainEntityName(); if (mainEname!=null && name.equalsIgnoreCase(mainEname.getName())) return iso; for (EntityName syn: iso.getSynonyms()) { if (name.equalsIgnoreCase(syn.getName())) return iso; } } return null; } /** * Compare isoform names by lexicographic order of master accession then numerically by isoform number. */ public static class ByIsoformUniqueNameComparator implements Comparator<String> { @Override public int compare(String uniqueName1, String uniqueName2) { if (uniqueName1.contains("-") && uniqueName2.contains("-")) { String[] accessionAndNumber1 = uniqueName1.split("-"); String[] accessionAndNumber2 = uniqueName2.split("-"); // compare lexicographically by master accessions int comp = accessionAndNumber1[0].compareTo(accessionAndNumber2[0]); // if equals -> compare numerically by isoform number if (comp == 0) { int isoNumber1 = Integer.parseInt(accessionAndNumber1[1]); int isoNumber2 = Integer.parseInt(accessionAndNumber2[1]); comp = isoNumber1 - isoNumber2; } return comp; } return uniqueName1.compareTo(uniqueName2); } } /** * Comparison done as follow : * The first isoform is always the canonical one, the remaining are sorted according to main entity names with the following criteria: * 1. if number prefix found -> numerically compared * 2. then lexicographically * 3. if number suffix found -> numerically compared **/ public static class IsoformComparator implements Comparator<Isoform> { private static final Pattern numPat = Pattern.compile("\\d+"); private static final Pattern prefixNumPat = Pattern.compile("^(\\d+)[a-zA-Z\\s]+$"); private static final Pattern suffixNumPat = Pattern.compile("^([a-zA-Z\\s]+)(\\d+)?$"); @Override public int compare(Isoform iso1, Isoform iso2) { // 1st criterium: canonical isoform comes first if (iso1.isCanonicalIsoform()) { return -1; } if (iso2.isCanonicalIsoform()) { return 1; } String name1 = iso1.getMainEntityName().getValue(); String name2 = iso2.getMainEntityName().getValue(); if (numPat.matcher(name1).find() || numPat.matcher(name2).find()) { // compare prefixes first int comp = comparePrefixNumbers(name1, name2); // if same prefixes or no prefixes if (comp == 0) { comp = compareStemThenSuffixNumbers(name1, name2); } return comp; } else { return name1.compareTo(name2); } } private int comparePrefixNumbers(String name1, String name2) { Matcher preMatcher1 = prefixNumPat.matcher(name1); Matcher preMatcher2 = prefixNumPat.matcher(name2); boolean isName1HasPrefixNumber = preMatcher1.find(); boolean isName2HasPrefixNumber = preMatcher2.find(); if (isName1HasPrefixNumber && isName2HasPrefixNumber) { int num1 = Integer.parseInt(preMatcher1.group(1)); int num2 = Integer.parseInt(preMatcher2.group(1)); return num1 - num2; } // name 1 comes first else if (isName1HasPrefixNumber) { return -1; } // name 2 comes first else if (isName2HasPrefixNumber) { return 1; } // no prefix number found -> compare stems return 0; } private int compareStemThenSuffixNumbers(String name1, String name2) { Matcher suffMatcher1 = suffixNumPat.matcher(name1); Matcher suffMatcher2 = suffixNumPat.matcher(name2); if (suffMatcher1.find() && suffMatcher2.find()) { String stem1 = suffMatcher1.group(1); String stem2 = suffMatcher2.group(1); int comp = stem1.compareTo(stem2); // same stem compare suffix numbers if (comp == 0) { if (suffMatcher1.group(2) != null && suffMatcher2.group(2) != null) { int num1 = Integer.parseInt(suffMatcher1.group(2)); int num2 = Integer.parseInt(suffMatcher2.group(2)); comp = num1 - num2; } else if (suffMatcher1.group(2) != null) { comp = 1; } else { comp = -1; } } return comp; } return name1.compareTo(name2); } } }