package org.nextprot.api.core.utils; import org.nextprot.api.core.domain.ChromosomalLocation; import java.util.Comparator; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Comparison function that sort ChromosomalLocation according to chromosome, region, band and sub-band * * <h3>Multiple criteria</h3> * * <h3>First by chromosome</h3> * "1" < ... < "23" < "X" < "Y" < "MT" < "unknown" * * <h3>Second by region</h3> * "unknown" < "p" < "cen" < "q" * * <h3>Third by band</h3> * numerical order ASC * * <h3>Then by sub-band</h3> * numerical order ASC * * Created by fnikitin on 12/11/15. */ public class ChromosomalLocationComparator implements Comparator<ChromosomalLocation> { private final static Pattern LOCATION_PATTERN = Pattern.compile("^[^pq]*([pq]|cen)(\\d+)?(\\.\\d+)?.*"); private final static Pattern INTEGER_PATTERN = Pattern.compile("^(\\d+)$"); private final static int UNKNOWN_CHROMOSOME_INDEX = 26; private final static int UNKNOWN_REGION_INDEX = 0; @Override public int compare(ChromosomalLocation l1, ChromosomalLocation l2) { int[] indices1 = calcLocationIndexList(l1); int[] indices2 = calcLocationIndexList(l2); for (int i=0; i<4 ; i++) { if (indices1[i] != indices2[i]) { return indices1[i] - indices2[i]; } } return 0; } private int[] calcLocationIndexList(ChromosomalLocation chromosomalLocation) { return calcLocationIndexList(chromosomalLocation.getChromosome(), chromosomalLocation.getBand()); } /** * Calculate a list of indices of respectively: * * 1. Chromosome (1, ..., 23, X, Y, MT, unknown) * 2. Band decomposed in region/band/subband or "unknown"/"" * 2.1 region (cen, p or q) * 2.2 band (int) * 2.3 sub band (int) * * For example: * * - "19p13.11" should return [18, 0, 12, 10] * - "19" should return [18, 3, 0, 0] * * @param chromosome * @return */ int[] calcLocationIndexList(String chromosome, String band) { int[] indices = new int[4]; // first chromosome index indices[0] = calcChromosomeIndex(chromosome); if (band != null) { // a band is more precisely described as: a region, optionals band and sub-band Matcher matcher = LOCATION_PATTERN.matcher(band); if (matcher.find()) { // second region index indices[1] = calcRegionIndex(matcher.group(1)); String b = matcher.group(2); if (b != null) { // third band index indices[2] = calcBandIndex(b); String sb = matcher.group(3); if (sb != null) { // fourth sub-band index indices[3] = calcBandIndex(sb.substring(1)); } } } // "" or unknown else { // second region index indices[1] = calcRegionIndex("unknown"); } } return indices; } /** * Calculate the ordered-index of the given ChromosomalLocation chromosome from 0 (chromosome 1) to 26 (unknown chromosome) * * @param chromosome the chromosome * @return the ordered-based index */ int calcChromosomeIndex(String chromosome) { if (chromosome == null) return UNKNOWN_CHROMOSOME_INDEX; Matcher matcher = INTEGER_PATTERN.matcher(chromosome); if (matcher.find()) { return Integer.parseInt(matcher.group(1))-1; } else { switch (chromosome) { case "X": return 23; case "Y": return 24; case "MT": return 25; case "unknown": return UNKNOWN_CHROMOSOME_INDEX; default: return -1; } } } int calcRegionIndex(String region) { switch (region) { case "p": return 1; case "cen": return 2; case "q": return 3; case "": case "unknown": return UNKNOWN_REGION_INDEX; default: return -1; } } int calcBandIndex(String band) { Matcher matcher = INTEGER_PATTERN.matcher(band); if (matcher.find()) { return Integer.parseInt(matcher.group(1))-1; } return -1; } }