/* * GeneticCode.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.evolution.datatype; /** * A set of standard genetic codes. * * @author Andrew Rambaut * @author Alexei Drummond * * @version $Id: GeneticCode.java,v 1.11 2005/05/24 20:25:56 rambaut Exp $ */ public final class GeneticCode implements CodonTable { public static final String GENETIC_CODE = "geneticCode"; /** * Constants used to refer to the built in code tables */ public static final int UNIVERSAL_ID = 0; public static final int VERTEBRATE_MT_ID = 1; public static final int YEAST_ID = 2; public static final int MOLD_PROTOZOAN_MT_ID = 3; public static final int MYCOPLASMA_ID = 4; public static final int INVERTEBRATE_MT_ID = 5; public static final int CILIATE_ID = 6; public static final int ECHINODERM_MT_ID = 7; public static final int EUPLOTID_NUC_ID = 8; public static final int BACTERIAL_ID = 9; public static final int ALT_YEAST_ID = 10; public static final int ASCIDIAN_MT_ID = 11; public static final int FLATWORM_MT_ID = 12; public static final int BLEPHARISMA_NUC_ID = 13; public static final int NO_STOPS_ID = 14; /** * Standard genetic code tables from GENBANK * Nucleotides go A, C, G, T - Note: this is not the order used by the Genbank web site * With the first codon position most significant (i.e. AAA, AAC, AAG, AAT, ACA, etc.). */ public static final String[] GENETIC_CODE_TABLES = { // Universal "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF", // Vertebrate Mitochondrial "KNKNTTTT*S*SMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF", // Yeast "KNKNTTTTRSRSMIMIQHQHPPPPRRRRTTTTEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF", // Mold Protozoan Mitochondrial "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF", // Mycoplasma "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF", // Invertebrate Mitochondrial "KNKNTTTTSSSSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF", // Ciliate "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVQYQYSSSS*CWCLFLF", // Echinoderm Mitochondrial "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF", // Euplotid Nuclear "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSCCWCLFLF", // Bacterial "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF", // Alternative Yeast "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLSLEDEDAAAAGGGGVVVV*Y*YSSSS*CWCLFLF", // Ascidian Mitochondrial "KNKNTTTTGSGSMIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*Y*YSSSSWCWCLFLF", // Flatworm Mitochondrial "NNKNTTTTSSSSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYY*YSSSSWCWCLFLF", // Blepharisma Nuclear "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVV*YQYSSSS*CWCLFLF", // No stops "KNKNTTTTRSRSIIMIQHQHPPPPRRRRLLLLEDEDAAAAGGGGVVVVYYQYSSSSWCWCLFLF" }; /** * Names of the standard genetic code tables from GENBANK */ public static final String[] GENETIC_CODE_NAMES = { "universal", "vertebrateMitochondrial", "yeast", "moldProtozoanMitochondrial", "mycoplasma", "invertebrateMitochondrial", "ciliate", "echinodermMitochondrial", "euplotidNuclear", "bacterial", "alternativeYeast", "ascidianMitochondrial", "flatwormMitochondrial", "blepharismaNuclear", "noStops" }; /** * Descriptions of the standard genetic code tables from GENBANK */ public static final String[] GENETIC_CODE_DESCRIPTIONS = { "Universal", "Vertebrate Mitochondrial", "Yeast", "Mold Protozoan Mitochondrial", "Mycoplasma", "Invertebrate Mitochondrial", "Ciliate", "Echinoderm Mitochondrial", "Euplotid Nuclear", "Bacterial", "Alternative Yeast", "Ascidian Mitochondrial", "Flatworm Mitochondrial", "Blepharisma Nuclear", "Test case with no stop codons" }; public static final GeneticCode UNIVERSAL = new GeneticCode(UNIVERSAL_ID); public static final GeneticCode VERTEBRATE_MT = new GeneticCode(VERTEBRATE_MT_ID); public static final GeneticCode YEAST = new GeneticCode(YEAST_ID); public static final GeneticCode MOLD_PROTOZOAN_MT = new GeneticCode(MOLD_PROTOZOAN_MT_ID); public static final GeneticCode MYCOPLASMA = new GeneticCode(MYCOPLASMA_ID); public static final GeneticCode INVERTEBRATE_MT = new GeneticCode(INVERTEBRATE_MT_ID); public static final GeneticCode CILIATE = new GeneticCode(CILIATE_ID); public static final GeneticCode ECHINODERM_MT = new GeneticCode(ECHINODERM_MT_ID); public static final GeneticCode EUPLOTID_NUC = new GeneticCode(EUPLOTID_NUC_ID); public static final GeneticCode BACTERIAL = new GeneticCode(BACTERIAL_ID); public static final GeneticCode ALT_YEAST = new GeneticCode(ALT_YEAST_ID); public static final GeneticCode ASCIDIAN_MT = new GeneticCode(ASCIDIAN_MT_ID); public static final GeneticCode FLATWORM_MT = new GeneticCode(FLATWORM_MT_ID); public static final GeneticCode BLEPHARISMA_NUC = new GeneticCode(BLEPHARISMA_NUC_ID); public static final GeneticCode NO_STOPS = new GeneticCode(NO_STOPS_ID); public static final GeneticCode[] GENETIC_CODES = { UNIVERSAL, VERTEBRATE_MT, YEAST, MOLD_PROTOZOAN_MT, MYCOPLASMA, INVERTEBRATE_MT, CILIATE, ECHINODERM_MT, EUPLOTID_NUC, BACTERIAL, ALT_YEAST, ASCIDIAN_MT, FLATWORM_MT, BLEPHARISMA_NUC, NO_STOPS }; public GeneticCode(int geneticCode) { this.geneticCode = geneticCode; codeTable = GENETIC_CODE_TABLES[geneticCode]; } /** * Returns the name of the genetic code */ public String getName() { return GENETIC_CODE_NAMES[geneticCode]; } /** * Returns the description of the genetic code */ public String getDescription() { return GENETIC_CODE_DESCRIPTIONS[geneticCode]; } /** * Returns the char associated with AminoAcid represented by codonState. * Note that the char is as defined by AminoAcids.java * @see AminoAcids * @see Codons * @return state for '?' if codon unknown */ public char getAminoAcidChar(int codonState) { if (codonState == Codons.UNKNOWN_STATE) return AminoAcids.UNKNOWN_CHARACTER; else if (codonState == Codons.GAP_STATE) return AminoAcids.GAP_CHARACTER; return codeTable.charAt(codonState); } /** * Returns the state associated with AminoAcid represented by codonState. * Note that the state is the canonical state (generated combinatorially) * @see AminoAcids * @see Codons * @return '?' if codon unknown */ public int getAminoAcidState(int codonState) { if (codonState == Codons.UNKNOWN_STATE) return AminoAcids.UNKNOWN_STATE; else if (codonState == Codons.GAP_STATE) return AminoAcids.GAP_STATE; return AminoAcids.AMINOACID_STATES[getAminoAcidChar(codonState)]; } /** * Note that the state is the canonical state (generated combinatorially) * @return whether the codonState is a stop codon */ public boolean isStopCodon(int codonState) { return (getAminoAcidState(codonState) == AminoAcids.STOP_STATE); } /** * @return all the possible codons for a given amino acid */ public char[][] getCodonsFromAminoAcidState(int aminoAcidState) { throw new RuntimeException("not yet implemented"); } /* * @return all the possible codons for a given amino acid */ public char[][] getCodonsFromAminoAcidChar(char aminoAcidChar) { throw new RuntimeException("not yet implemented"); } /* * @returns three IUPAC states representing the given amino acid * @note The returned array should not be altered, and implementations * should attempt to implement this as efficiently as possible */ public int[] getAmbiguousCodonFromAminoAcidState(int aminoAcid) { throw new RuntimeException("not yet implemented"); } /** * @return the codon states of stop amino acids. */ public int[] getStopCodonIndices() { int i, j, n = getStopCodonCount(); int[] indices = new int[n]; j = 0; for (i = 0; i < 64; i++) { if (codeTable.charAt(i) == AminoAcids.STOP_CHARACTER) { indices[j] = i; j++; } } return indices; } /** * Returns the number of terminator amino acids. */ public int getStopCodonCount() { int i, count = 0; for (i = 0; i < 64; i++) { if (codeTable.charAt(i) == AminoAcids.STOP_CHARACTER) count++; } return count; } private int geneticCode; private String codeTable; }