/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.bio; import java.util.Collections; import java.util.HashSet; import java.util.Set; /** * This class define common alphabets. * @since 1.1 * @author Laurent Jourdren */ public final class Alphabets { private static final Set<Character> AMBIGUOUS_DNA_ALPHABET_LETTERS = toUnmodifiableSet(new char[] {'G', 'A', 'T', 'C', 'R', 'Y', 'W', 'S', 'M', 'K', 'H', 'B', 'V', 'D', 'N'}); private static final Set<Character> UNAMBIGUOUS_DNA_ALPHABET_LETTERS = toUnmodifiableSet(new char[] {'G', 'A', 'T', 'C'}); private static final Set<Character> AMBIGUOUS_RNA_ALPHABET_LETTERS = toUnmodifiableSet(new char[] {'G', 'A', 'U', 'C', 'R', 'Y', 'W', 'S', 'M', 'K', 'H', 'B', 'V', 'D', 'N'}); private static final Set<Character> UNAMBIGUOUS_RNA_ALPHABET_LETTERS = toUnmodifiableSet(new char[] {'G', 'A', 'U', 'C'}); private static final Set<Character> READ_DNA_ALPHABET_LETTERS = toUnmodifiableSet(new char[] {'G', 'A', 'T', 'C', 'N'}); // // Utility method // /** * Transform an array of char to an unmodifiable set of Characters. * @param array array to transform * @return a unmodifiable Set with the elements of the input array */ private static Set<Character> toUnmodifiableSet(final char[] array) { if (array == null) { return null; } final Set<Character> result = new HashSet<>(); for (char c : array) { result.add(c); } return Collections.unmodifiableSet(result); } // // Classes // /** An ambiguous DNA alphabet. */ public static final Alphabet AMBIGUOUS_DNA_ALPHABET = new Alphabet() { @Override public final String getName() { return "AmbiguousDNA"; } @Override public final Set<Character> getLetters() { return AMBIGUOUS_DNA_ALPHABET_LETTERS; } @Override protected final boolean isLowerCaseValid() { return true; } @Override protected final char getComplement(final char letter) { switch (letter) { case 'A': return 'T'; case 'C': return 'G'; case 'G': return 'C'; case 'T': return 'A'; case 'M': return 'K'; case 'R': return 'Y'; case 'W': return 'W'; case 'S': return 'S'; case 'Y': return 'R'; case 'K': return 'M'; case 'V': return 'B'; case 'H': return 'D'; case 'D': return 'H'; case 'B': return 'V'; case 'X': return 'X'; case 'N': return 'N'; case 'a': return 't'; case 'c': return 'g'; case 'g': return 'c'; case 't': return 'a'; case 'm': return 'k'; case 'r': return 'y'; case 'w': return 'w'; case 's': return 's'; case 'y': return 'r'; case 'k': return 'm'; case 'v': return 'b'; case 'h': return 'd'; case 'd': return 'h'; case 'b': return 'v'; case 'x': return 'x'; case 'n': return 'n'; default: return letter; } } }; /** An unambiguous DNA alphabet. */ public static final Alphabet UNAMBIGUOUS_DNA_ALPHABET = new Alphabet() { @Override public final String getName() { return "UnAmbiguousDNA"; } @Override public final Set<Character> getLetters() { return UNAMBIGUOUS_DNA_ALPHABET_LETTERS; } @Override protected final boolean isLowerCaseValid() { return true; } @Override protected final char getComplement(final char letter) { switch (letter) { case 'A': return 'T'; case 'C': return 'G'; case 'G': return 'C'; case 'T': return 'A'; case 'a': return 't'; case 'c': return 'g'; case 'g': return 'c'; case 't': return 'a'; default: return letter; } } }; /** An ambiguous RNA alphabet. */ public static final Alphabet AMBIGUOUS_RNA_ALPHABET = new Alphabet() { @Override public final String getName() { return "AmbiguousRNA"; } @Override public final Set<Character> getLetters() { return AMBIGUOUS_RNA_ALPHABET_LETTERS; } @Override protected final boolean isLowerCaseValid() { return true; } @Override protected final char getComplement(final char letter) { switch (letter) { case 'A': return 'U'; case 'C': return 'G'; case 'G': return 'C'; case 'U': return 'A'; case 'M': return 'K'; case 'R': return 'Y'; case 'W': return 'W'; case 'S': return 'S'; case 'Y': return 'R'; case 'K': return 'M'; case 'V': return 'B'; case 'H': return 'D'; case 'D': return 'H'; case 'B': return 'V'; case 'X': return 'X'; case 'N': return 'N'; case 'a': return 'u'; case 'c': return 'g'; case 'g': return 'c'; case 'u': return 'a'; case 'm': return 'k'; case 'r': return 'y'; case 'w': return 'w'; case 's': return 's'; case 'y': return 'r'; case 'k': return 'm'; case 'v': return 'b'; case 'h': return 'd'; case 'd': return 'h'; case 'b': return 'v'; case 'x': return 'x'; case 'n': return 'n'; default: return letter; } } }; /** An unambiguous RNA alphabet. */ public static final Alphabet UNAMBIGUOUS_RNA_ALPHABET = new Alphabet() { @Override public final String getName() { return "UnAmbiguousRNA"; } @Override public final Set<Character> getLetters() { return UNAMBIGUOUS_RNA_ALPHABET_LETTERS; } @Override protected final boolean isLowerCaseValid() { return true; } @Override protected final char getComplement(final char letter) { switch (letter) { case 'A': return 'U'; case 'C': return 'G'; case 'G': return 'C'; case 'U': return 'A'; case 'a': return 'u'; case 'c': return 'g'; case 'g': return 'c'; case 'u': return 'a'; default: return letter; } } }; /** An alphabet for reads. */ public static final Alphabet READ_DNA_ALPHABET = new Alphabet() { @Override public final String getName() { return "ReadDNA"; } @Override public final Set<Character> getLetters() { return READ_DNA_ALPHABET_LETTERS; } @Override protected final boolean isLowerCaseValid() { return false; } @Override protected final char getComplement(final char letter) { switch (letter) { case 'A': return 'T'; case 'C': return 'G'; case 'G': return 'C'; case 'T': return 'A'; case 'N': return 'N'; default: return letter; } } @Override public final boolean isLetterValid(final char letter) { // This method is fastest than the default implementation switch (letter) { case 'A': case 'C': case 'G': case 'T': case 'N': return true; default: return false; } } }; }