/** * edu.utexas.GeDBIT.type.DNA 2006.05.24 * * Copyright Information: * * Change Log: * 2006.05.24: Modified from original jdb package, by Willard */ package GeDBIT.type; import java.io.IOException; import java.io.ObjectInput; import java.io.ObjectOutput; import GeDBIT.dist.SymmetricSubstitutionWeightMatrix; import GeDBIT.dist.WeightMatrix; /** * A <code>DNA</code> is a compact representation of a DNA sequence. * * @author Rui Mao * @version 2003.06.06 */ public class DNA extends Sequence { private static final long serialVersionUID = 1351256505737923337L; public static enum DNASymbol implements Symbol { A("Adenine", (byte) 0), C("Cytosine", (byte) 1), G("Guanine", (byte) 2), T( "Thymine", (byte) 3), R("Purine", (byte) 4), Y("Pyrimidine", (byte) 5), M("C or A", (byte) 6), K("T, U,or G", (byte) 7), W( "T, U or A", (byte) 8), S("C or G", (byte) 9), B("not A", (byte) 10), D("not C", (byte) 11), H("not G", (byte) 12), V( "not T, U", (byte) 13), N("Any base", (byte) 14); private String description; public byte byteValue; DNASymbol() { } DNASymbol(String description, byte byteValue) { this.description = description; this.byteValue = byteValue; } public String description() { return description; } public byte byteValue() { return byteValue; } public String stringValue() { return toString(); } public Symbol getSymbol(String s) { return valueOf(s); } public void writeExternal(ObjectOutput out) throws IOException { out.writeInt(description.length()); out.writeChars(description); out.writeByte(byteValue); } public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { char[] charDescription = new char[in.readInt()]; for (int i = 0; i < charDescription.length; i++) { charDescription[i] = in.readChar(); } description = String.copyValueOf(charDescription); byteValue = in.readByte(); } public static int distinctSize() { return 15; } } public static final Alphabet ALPHABET = new Alphabet(DNASymbol.values(), DNASymbol.distinctSize()); public static final Alphabet SIMPLE_ALPHABET = new Alphabet( new DNASymbol[] { DNASymbol.A, DNASymbol.C, DNASymbol.T, DNASymbol.G }, 4); public Alphabet getAlphabet() { return ALPHABET; } public DNA(String sequenceID, String sequence) { super(sequenceID, sequence); for (int i = 0; i < data.length; i++) { data[i] = (DNASymbol.valueOf(sequence.substring(i, i + 1))) .byteValue(); } } public Symbol get(int index) { return ALPHABET.get(data[index]); } public String toString() { StringBuffer buffer = new StringBuffer(data.length); for (int i = 0; i < data.length; i++) buffer.append(ALPHABET.get(data[i])); return buffer.toString(); } /** * The SimpleDNAEditDistanceMatrix looks like: {0,1,1,1}, {1,0,1,1}, * {1,1,0,1}, {1,1,1,0} */ public final static WeightMatrix SimpleDNAEditDistanceMatrix = new SymmetricSubstitutionWeightMatrix( DNA.SIMPLE_ALPHABET, new double[][] { { 0, 1, 1, 1 }, { 1, 0, 1, 1 }, { 1, 1, 0, 1 }, { 1, 1, 1, 0 } }); public final static WeightMatrix SimpleWeightedDNAMatrix = new SymmetricSubstitutionWeightMatrix( DNA.SIMPLE_ALPHABET, new double[][] { { 0, 2 / 3, 2 / 3, 2 / 3 }, { 2 / 3, 0, 2 / 3, 2 / 3 }, { 2 / 3, 2 / 3, 0, 1 }, { 2 / 3, 2 / 3, 1, 0 } }); public final static WeightMatrix EditDistanceWeightMatrix = new SymmetricSubstitutionWeightMatrix( DNA.ALPHABET, new double[][] { { 0, 1, 1, 1, 0.5, 1, 0.5, 1, 0.5, 1, 1, 0.5, 0.5, 0.5, 0.5 },// A // Adenine { 1, 0, 1, 1, 1, 0.5, 0.5, 1, 1, 0.5, 0.5, 1, 0.5, 0.5, 0.5 },// C // Cytosine { 1, 1, 0, 1, 0.5, 1, 1, 0.5, 1, 0.5, 0.5, 0.5, 1, 0.5, 0.5 },// G // Guanine { 1, 1, 1, 0, 1, 0.5, 1, 0.5, 0.5, 1, 0.5, 0.5, 0.5, 1, 0.5 },// T // Thymine { 0.5, 1, 0.5, 1, 0, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 }, // R Purine (A or G) { 1, 0.5, 1, 0.5, 1, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 },// Y Pyrimidine (C, T, or U) { 0.5, 0.5, 1, 1, 0.5, 0.5, 0, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 },// M C or A { 1, 1, 0.5, 0.5, 0.5, 0.5, 1, 0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 },// K T, U, or G { 0.5, 1, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 1, 0.5, 0.5, 0.5, 0.5, 0.5 },// W T, U, or A { 1, 0.5, 0.5, 1, 0.5, 0.5, 0.5, 0.5, 1, 0, 0.5, 0.5, 0.5, 0.5, 0.5 },// S C or G { 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0.5, 0.5, 0.5, 0.5 },// B C, T, U, or G (not A) { 0.5, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0.5, 0.5, 0.5 }, // D A, T, U, or G (not C) { 0.5, 0.5, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0.5, 0.5 },// H A, T, U, or C (not G) { 0.5, 0.5, 0.5, 1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0, 0.5 }, // V A, C, or G (not T, not U) { 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0 } // N Anybase (A,C,G,T,or U) }); }