/* * Codons.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.evolution.datatype; /** * Implements DataType for codons * <p/> * Codons have tree different representations: * State numbers - 0-63 + 64, 65 as unknown and gap * State chars - the above converted into chars, starting at 'A' * and '?' + '-' for unknown and gap * Strings or chars of three nucleotide characters * * @author Andrew Rambaut * @author Alexei Drummond * @version $Id: Codons.java,v 1.15 2005/05/24 20:25:56 rambaut Exp $ */ public class Codons extends DataType { /** * Name of data type. For XML and human reading of data type. */ public static final String DESCRIPTION = "codon"; public static final int TYPE = CODONS; public static final Codons UNIVERSAL = new Codons(GeneticCode.UNIVERSAL); public static final Codons VERTEBRATE_MT = new Codons(GeneticCode.VERTEBRATE_MT); public static final Codons YEAST = new Codons(GeneticCode.YEAST); public static final Codons MOLD_PROTOZOAN_MT = new Codons(GeneticCode.MOLD_PROTOZOAN_MT); public static final Codons MYCOPLASMA = new Codons(GeneticCode.MYCOPLASMA); public static final Codons INVERTEBRATE_MT = new Codons(GeneticCode.INVERTEBRATE_MT); public static final Codons CILIATE = new Codons(GeneticCode.CILIATE); public static final Codons ECHINODERM_MT = new Codons(GeneticCode.ECHINODERM_MT); public static final Codons EUPLOTID_NUC = new Codons(GeneticCode.EUPLOTID_NUC); public static final Codons BACTERIAL = new Codons(GeneticCode.BACTERIAL); public static final Codons ALT_YEAST = new Codons(GeneticCode.ALT_YEAST); public static final Codons ASCIDIAN_MT = new Codons(GeneticCode.ASCIDIAN_MT); public static final Codons FLATWORM_MT = new Codons(GeneticCode.FLATWORM_MT); public static final Codons BLEPHARISMA_NUC = new Codons(GeneticCode.BLEPHARISMA_NUC); public static final Codons NO_STOPS = new Codons(GeneticCode.NO_STOPS); public static final int UNKNOWN_STATE = 64; public static final int GAP_STATE = 65; public static final String[] CODON_TRIPLETS = { "AAA", "AAC", "AAG", "AAT", "ACA", "ACC", "ACG", "ACT", "AGA", "AGC", "AGG", "AGT", "ATA", "ATC", "ATG", "ATT", "CAA", "CAC", "CAG", "CAT", "CCA", "CCC", "CCG", "CCT", "CGA", "CGC", "CGG", "CGT", "CTA", "CTC", "CTG", "CTT", "GAA", "GAC", "GAG", "GAT", "GCA", "GCC", "GCG", "GCT", "GGA", "GGC", "GGG", "GGT", "GTA", "GTC", "GTG", "GTT", "TAA", "TAC", "TAG", "TAT", "TCA", "TCC", "TCG", "TCT", "TGA", "TGC", "TGG", "TGT", "TTA", "TTC", "TTG", "TTT", "???", "---" }; /* * Default unused constructor */ public Codons() { } /** * Protected constructor - DEFAULT_INSTANCE and descendents provide the only instance */ protected Codons(GeneticCode geneticCode) { this.geneticCode = geneticCode; stateCount = 64 - geneticCode.getStopCodonCount(); ambiguousStateCount = 66; stateMap = new int[ambiguousStateCount]; reverseMap = new int[ambiguousStateCount]; int j = 0; int k = stateCount; for (int i = 0; i < 64; i++) { if (!geneticCode.isStopCodon(i)) { stateMap[j] = i; reverseMap[i] = j; j++; } else { stateMap[k] = i; reverseMap[i] = k; k++; } } for (int i = 64; i < ambiguousStateCount; i++) { stateMap[i] = i; reverseMap[i] = i; } } @Override public char[] getValidChars() { return null; } /** * Get state corresponding to a character * * @param c character * @return state */ public final int getState(char c) { throw new IllegalArgumentException("Codons datatype cannot be expressed as char"); } /** * Get state corresponding to an unknown * * @return state */ public int getUnknownState() { return UNKNOWN_STATE; } /** * Get state corresponding to a gap * * @return state */ public int getGapState() { return GAP_STATE; } /** * Get state corresponding to a nucleotide triplet * * @param nuc1 the codon triplet as chars * @param nuc2 the codon triplet as chars * @param nuc3 the codon triplet as chars * @return state */ public final int getState(char nuc1, char nuc2, char nuc3) { return getState(Nucleotides.INSTANCE.getState(nuc1), Nucleotides.INSTANCE.getState(nuc2), Nucleotides.INSTANCE.getState(nuc3)); } /** * @return the canonical state (in standard combinatorial order) * of a funny codon state. */ public final int getCanonicalState(int funnyState) { return stateMap[funnyState]; } /** * Get state corresponding to a nucleotide triplet * * @param nuc1 the codon triplet as states * @param nuc2 the codon triplet as states * @param nuc3 the codon triplet as states * @return state */ public final int getState(int nuc1, int nuc2, int nuc3) { if (Nucleotides.INSTANCE.isGapState(nuc1) || Nucleotides.INSTANCE.isGapState(nuc2) || Nucleotides.INSTANCE.isGapState(nuc3)) { return GAP_STATE; } if (Nucleotides.INSTANCE.isAmbiguousState(nuc1) || Nucleotides.INSTANCE.isAmbiguousState(nuc2) || Nucleotides.INSTANCE.isAmbiguousState(nuc3)) { return UNKNOWN_STATE; } int canonicalState = (nuc1 * 16) + (nuc2 * 4) + nuc3; return reverseMap[canonicalState]; } /** * Get character corresponding to a given state * * @param state state * <p/> * return corresponding character */ public final char getChar(int state) { throw new IllegalArgumentException("Codons datatype cannot be expressed as char"); } /** * Get triplet string corresponding to a given state * * @param state state * <p/> * return corresponding triplet string */ public final String getTriplet(int state) { return CODON_TRIPLETS[stateMap[state]]; } /** * Get an array of three nucleotide states making this codon state * * @param state state * <p/> * return corresponding triplet string */ public final int[] getTripletStates(int state) { int[] triplet = new int[3]; triplet[0] = Nucleotides.INSTANCE.getState(CODON_TRIPLETS[stateMap[state]].charAt(0)); triplet[1] = Nucleotides.INSTANCE.getState(CODON_TRIPLETS[stateMap[state]].charAt(1)); triplet[2] = Nucleotides.INSTANCE.getState(CODON_TRIPLETS[stateMap[state]].charAt(2)); return triplet; } /** * description of data type * * @return string describing the data type */ public String getDescription() { return DESCRIPTION; } /** * type of data type * * @return integer code for the data type */ public int getType() { return TYPE; } /** * @return the genetic code */ public GeneticCode getGeneticCode() { return geneticCode; } /** * Takes non-canonical state and returns true if it represents stop codon * * @param state * @return true if the given state is a stop codon */ public final boolean isStopCodon(int state) { return geneticCode.isStopCodon(stateMap[state]); } public static byte[] constructRateMap(Codons codonDataType) { final int stateCount = codonDataType.getStateCount(); final int rateCount = (stateCount * (stateCount - 1)) / 2; return constructRateMap(rateCount, stateCount, codonDataType, codonDataType.getGeneticCode()); } /** * Parse a text string to return a genetic code */ public static Codons findByName(String codeStr) { Codons codons = null; if (codeStr.equals(GeneticCode.UNIVERSAL.getName())) { codons = Codons.UNIVERSAL; } else if (codeStr.equals(GeneticCode.VERTEBRATE_MT.getName())) { codons = Codons.VERTEBRATE_MT; } else if (codeStr.equals(GeneticCode.YEAST.getName())) { codons = Codons.YEAST; } else if (codeStr.equals(GeneticCode.MOLD_PROTOZOAN_MT.getName())) { codons = Codons.MOLD_PROTOZOAN_MT; } else if (codeStr.equals(GeneticCode.INVERTEBRATE_MT.getName())) { codons = Codons.INVERTEBRATE_MT; } else if (codeStr.equals(GeneticCode.CILIATE.getName())) { codons = Codons.CILIATE; } else if (codeStr.equals(GeneticCode.ECHINODERM_MT.getName())) { codons = Codons.ECHINODERM_MT; } else if (codeStr.equals(GeneticCode.EUPLOTID_NUC.getName())) { codons = Codons.EUPLOTID_NUC; } else if (codeStr.equals(GeneticCode.BACTERIAL.getName())) { codons = Codons.BACTERIAL; } else if (codeStr.equals(GeneticCode.ALT_YEAST.getName())) { codons = Codons.ALT_YEAST; } else if (codeStr.equals(GeneticCode.ASCIDIAN_MT.getName())) { codons = Codons.ASCIDIAN_MT; } else if (codeStr.equals(GeneticCode.FLATWORM_MT.getName())) { codons = Codons.FLATWORM_MT; } else if (codeStr.equals(GeneticCode.BLEPHARISMA_NUC.getName())) { codons = Codons.BLEPHARISMA_NUC; } else if (codeStr.equals(GeneticCode.NO_STOPS.getName())) { codons = Codons.NO_STOPS; } else { throw new RuntimeException("Unknown genetics code"); } return codons; } /** * Construct a map of the rate classes in the rate matrix using the current * genetic code. Classes: * 0: codon changes in more than one codon position (or stop codons) * 1: synonymous transition * 2: synonymous transversion * 3: non-synonymous transition * 4: non-synonymous transversion */ public static byte[] constructRateMap(int rateCount, int stateCount, Codons codonDataType, GeneticCode geneticCode) { int u, v, i1, j1, k1, i2, j2, k2; byte rateClass; int[] codon; int cs1, cs2, aa1, aa2; int i = 0; byte[] rateMap = new byte[rateCount]; for (u = 0; u < stateCount; u++) { codon = codonDataType.getTripletStates(u); i1 = codon[0]; j1 = codon[1]; k1 = codon[2]; cs1 = codonDataType.getState(i1, j1, k1); aa1 = geneticCode.getAminoAcidState(codonDataType.getCanonicalState(cs1)); for (v = u + 1; v < stateCount; v++) { codon = codonDataType.getTripletStates(v); i2 = codon[0]; j2 = codon[1]; k2 = codon[2]; cs2 = codonDataType.getState(i2, j2, k2); aa2 = geneticCode.getAminoAcidState(codonDataType.getCanonicalState(cs2)); rateClass = -1; if (i1 != i2) { if ( (i1 == 0 && i2 == 2) || (i1 == 2 && i2 == 0) || // A <-> G (i1 == 1 && i2 == 3) || (i1 == 3 && i2 == 1) ) { // C <-> T rateClass = 1; // Transition at position 1 } else { rateClass = 2; // Transversion at position 1 } } if (j1 != j2) { if (rateClass == -1) { if ( (j1 == 0 && j2 == 2) || (j1 == 2 && j2 == 0) || // A <-> G (j1 == 1 && j2 == 3) || (j1 == 3 && j2 == 1) ) { // C <-> T rateClass = 1; // Transition } else { rateClass = 2; // Transversion } } else rateClass = 0; // Codon changes at more than one position } if (k1 != k2) { if (rateClass == -1) { if ( (k1 == 0 && k2 == 2) || (k1 == 2 && k2 == 0) || // A <-> G (k1 == 1 && k2 == 3) || (k1 == 3 && k2 == 1) ) { // C <-> T rateClass = 1; // Transition } else { rateClass = 2; // Transversion } } else rateClass = 0; // Codon changes at more than one position } if (rateClass != 0) { if (aa1 != aa2) { rateClass += 2; // Is a non-synonymous change } } rateMap[i] = rateClass; i++; } } return rateMap; } // Private members protected GeneticCode geneticCode; protected int[] stateMap; protected int[] reverseMap; }