/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.comparator; import java.util.Comparator; import java.util.HashMap; import java.util.Map; import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome; /** * This class compares chromosome regarding their names. * @author Nicolas Fourel * @version 0.1 */ public class ChromosomeComparator implements Comparator<Chromosome> { private Map<Character, Integer> charScore; // Array to set the importance of every special character @Override public int compare(Chromosome chromosome1, Chromosome chromosome2) { return compareChromosomeName(chromosome1.getName(), chromosome2.getName()); } public int compareChromosomeName(String chromosomeName1, String chromosomeName2) { chromosomeName1 = chromosomeName1.toLowerCase(); chromosomeName2 = chromosomeName2.toLowerCase(); initCharScore(); if (startsWithCHR(chromosomeName1) && startsWithCHR(chromosomeName2)) { // if both strings start with "chr" pattern Integer i1 = getInteger(chromosomeName1, 3); // gets the integer after the "chr" for the first string Integer i2 = getInteger(chromosomeName2, 3); // gets the integer after the "chr" for the second string if ((i1 != null) && (i2 != null)) { // if both have an integer return i1.compareTo(i2); // regular integer comparison } else if ((i1 != null) && (i2 == null)) { // if first string has an integer but second string return -1; // the first string is before } else if ((i1 == null) && (i2 != null)) { // if first string has not an integer but second string return 1; // the second string is before } else { // if both have no integer Integer score1 = getSpecialCharCode(chromosomeName1); // get the special character code (X,Y,M) after the "chr" of the first string Integer score2 = getSpecialCharCode(chromosomeName2); // get the special character code (X,Y,M) after the "chr" of the second string if ((score1 != null) && (score2 != null)) { // if they both have a special character return score1.compareTo(score2); // regular integer comparison (special characters are related to an integer according to their importance) } else if ((score1 != null) && (score2 == null)) { // if first string has a special character but second string return -1; // the first string is before } else if ((score1 == null) && (score2 != null)) { // if second string has not a special character but second string return 1; // the second string is before } else { Integer index1 = getUnderScoreCharIndex(chromosomeName1); // gets the index of the underscore for the first string Integer index2 = getUnderScoreCharIndex(chromosomeName2); // gets the index of the underscore for the second string if ((index1 != null) && (index2 != null)) { // if both strings contain an underscore Integer i3 = getInteger(chromosomeName1, 3, index1); // gets the integer after the "chr" and before the "_" for the first string Integer i4 = getInteger(chromosomeName2, 3, index2); // gets the integer after the "chr" and before the "_" for the second string if ((i3 != null) && (i4 != null)) { // if both have an integer int result = i3.compareTo(i4); if (result == 0) { String newO1 = chromosomeName1.substring(index1 + 1, chromosomeName1.length()); String newO2 = chromosomeName2.substring(index2 + 1, chromosomeName2.length()); return compareChromosomeName(newO1, newO2); } return result; // regular integer comparison } else if ((i3 != null) && (i4 == null)) { // if first string has an integer but second string return -1; // the first string is before } else if ((i3 == null) && (i4 != null)) { // if first string has not an integer but second string return 1; // the second string is before } } else if ((index1 != null) && (index2 == null)) { // if first string has an integer but second string return -1; // the first string is before } else if ((index1 == null) && (index2 != null)) { // if first string has not an integer but second string return 1; } } } } else if (startsWithCHR(chromosomeName1) && !startsWithCHR(chromosomeName2)) { // if first string starts with "chr" but second string return -1; // the first string is before } else if (!startsWithCHR(chromosomeName1) && startsWithCHR(chromosomeName2)) { // if first string does not start with "chr" but second string return 1; // the second string is before } StringComparator stringComparator = new StringComparator(); return stringComparator.compare(chromosomeName1, chromosomeName2); } /** * @param s the string * @param startIndex the start index * @return the integer part of the string, null otherwise */ private Integer getInteger (String s, int startIndex) { return getInteger(s, startIndex, s.length()); } /** * @param s the string * @param startIndex the start index * @param stopIndex the stop index * @return the integer part of the string, null otherwise */ private Integer getInteger (String s, int startIndex, int stopIndex) { Integer result; try { result = Integer.parseInt(s.substring(startIndex, stopIndex)); } catch (Exception e) { result = null; } return result; } /** * Looks for the score of the special character (if it exists) at the index 3 of a string. * This method is used for chromosome name like "chr..." (chrX, chrY, chrM...). * @param s the string * @return the score of the special character */ private Integer getSpecialCharCode (String s) { char c = s.charAt(3); Integer score = null; if (charScore.containsKey(c)) { score = charScore.get(c); } return score; } /** * @param s the string * @return the index of the first occurence of the underscore in the string, null it it does not exist */ private Integer getUnderScoreCharIndex (String s) { Integer index = s.indexOf("_"); if (index > -1) { return index; } return null; } /** * Initializes the character score array */ private void initCharScore () { charScore = new HashMap<Character, Integer>(); charScore.put('x', 0); charScore.put('y', 1); charScore.put('m', 2); charScore.put('_', 3); } /** * @param text the string to look in * @param pattern the pattern to look with * @param startIndex the start position in the string * @param stopIndex the stop position in the string * @return true if the pattern is presents, false otherwise. */ private boolean isPatternPresent (String text, String pattern, int startIndex, int stopIndex) { if (stopIndex < text.length()) { return text.substring(startIndex, stopIndex).toLowerCase().equals(pattern); } else { return false; } } /** * @param s the chromosome name * @return true if it starts with "chr", false otherwise. */ private boolean startsWithCHR (String s) { return isPatternPresent(s, "chr", 0, 3); } }