/* * Nucleotides.java * * Copyright (c) 2002-2015 Alexei Drummond, Andrew Rambaut and Marc Suchard * * This file is part of BEAST. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership and licensing. * * BEAST is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * BEAST is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with BEAST; if not, write to the * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, * Boston, MA 02110-1301 USA */ package dr.evolution.datatype; /** * implements DataType for nucleotides with ambiguous characters * * @version $Id: Nucleotides.java,v 1.10 2006/08/31 14:57:24 rambaut Exp $ * * @author Andrew Rambaut * @author Alexei Drummond */ public class Nucleotides extends DataType { public static final String JC = "JC"; public static final String F84 = "F84"; public static final String HKY = "HKY"; public static final String GTR = "GTR"; /** * Name of data type. For XML and human reading of data type. */ public static final String DESCRIPTION = "nucleotide"; public static final int TYPE = NUCLEOTIDES; public static final Nucleotides INSTANCE = new Nucleotides(); public static final int A_STATE = 0; public static final int C_STATE = 1; public static final int G_STATE = 2; public static final int UT_STATE = 3; public static final int R_STATE = 5; // A or G public static final int Y_STATE = 6; // C or T public static final int UNKNOWN_STATE = 16; public static final int GAP_STATE = 17; /** * A table to translate state numbers (0-17) into character codes */ public static final char[] NUCLEOTIDE_CHARS = { 'A','C','G','T','U','K','M','R','S','W','Y','B','D','H','V','N', UNKNOWN_CHARACTER,GAP_CHARACTER}; /** * This table maps nucleotide characters into state codes (0-17) * Nucleotides go ACGTURYMWSKBDHVN?-", Other letters are mapped to ?. * ? and - are mapped to themselves. All other chars are mapped to -. */ public static final int NUCLEOTIDE_STATES[] = { 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, // 0-15 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, // 16-31 // - 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,17, // 32-47 // ? 17,17,17,17,17,17,17,17,17,17,17,17,17,17,17,16, // 48-63 // A B C D e f G H i j K l M N o 17, 0,11, 1,12,16,16, 2,13,16,16,10,16, 7,15,16, // 64-79 // p q R S T U V W x Y z 16,16, 5, 9, 3, 3,14, 8,16, 6,16,17,17,17,17,17, // 80-95 // A B C D e f G H i j K l M N o 17, 0,11, 1,12,16,16, 2,13,16,16,10,16, 7,15,16, // 96-111 // p q R S T U V W x Y z 16,16, 5, 9, 3, 3,14, 8,16, 6,16,17,17,17,17,17 // 112-127 }; /** * A table to map state numbers (0-17) to their ambiguities */ public static final String[] NUCLEOTIDE_AMBIGUITIES = { // A C G T U R Y M W S K "A", "C", "G", "T", "T", "AG", "CT", "AC", "AT", "CG", "GT", // B D H V N ? - "CGT", "AGT", "ACT", "ACG", "ACGT", "ACGT", "ACGT" }; /** * Private constructor - DEFAULT_INSTANCE provides the only instance */ protected Nucleotides() { stateCount = 4; ambiguousStateCount = 18; } @Override public char[] getValidChars() { return NUCLEOTIDE_CHARS; } /** * Get state corresponding to a character * * @param c character * * @return state */ public int getState(char c) { return NUCLEOTIDE_STATES[c]; } /** * Get state corresponding to an unknown * * @return state */ public int getUnknownState() { return UNKNOWN_STATE; } /** * Get state corresponding to a gap * * @return state */ public int getGapState() { return GAP_STATE; } /** * Get character corresponding to a given state * * @param state state * * return corresponding character */ public char getChar(int state) { return NUCLEOTIDE_CHARS[state]; } /** * returns an array containing the non-ambiguous states * that this state represents. */ public int[] getStates(int state) { String stateString = NUCLEOTIDE_AMBIGUITIES[state]; int[] states = new int[stateString.length()]; for (int i = 0; i < stateString.length(); i++) { states[i] = getState(stateString.charAt(i)); } return states; } /** * returns an array containing the non-ambiguous states that this state represents. */ public boolean[] getStateSet(int state) { boolean[] stateSet = new boolean[stateCount]; for (int i = 0; i < stateCount; i++) stateSet[i] = false; int len = NUCLEOTIDE_AMBIGUITIES[state].length(); for (int i = 0; i < len; i++) stateSet[getState(NUCLEOTIDE_AMBIGUITIES[state].charAt(i))] = true; return stateSet; } /** * description of data type * * @return string describing the data type */ public String getDescription() { return DESCRIPTION; } /** * type of data type * * @return integer code for the data type */ public int getType() { return TYPE; } }