package org.genedb.util;
public class SequenceUtils {
private static final char[] COMPLEMENT_FROM = "acgtmrwsykvhdbnx".toCharArray();
private static final char[] COMPLEMENT_TO = "tgcakywsrmbdhvnx".toCharArray();
/**
* Compute the reverse-complement of a DNA sequence
* @param sequence the sequence, in the lower-case DNA alphabet
* @return the reverse-complement of the sequence
*/
public static String reverseComplement(String sequence) {
StringBuilder sb = transliterate(sequence.toLowerCase(), COMPLEMENT_FROM, COMPLEMENT_TO);
sb.reverse();
return sb.toString();
}
private static StringBuilder transliterate(String string, char[] from, char[] to) {
if (from.length != to.length) {
throw new IllegalArgumentException("Source and destination alphabets have different lengths");
}
StringBuilder result = new StringBuilder();
for (char c: string.toCharArray()) {
boolean foundChar = false;
for (int i=0; i < from.length; i++) {
if (c == from[i]) {
result.append(to[i]);
foundChar = true;
break;
}
}
if (!foundChar)
throw new IllegalArgumentException(String.format("String contains character '%c' not in alphabet", c));
}
return result;
}
/**
* Translate a DNA sequence to a polypeptide sequence.
*
* @see http://www.ncbi.nlm.nih.gov/Taxonomy/Utils/wprintgc.cgi
*
* @param translationTableId the genetic code ID, as defined by NCBI
* @param dnaSequence the DNA sequence
* @param phase the phase (0-2)
* @param stopCodonTranslatedAsSelenocysteine whether an internal stop codon should be
* treated as read-through and translated to Selenocysteine
* @return the translated sequence
* @throws TranslationException
*/
public static String translate(int translationTableId, String dnaSequence, int phase, boolean stopCodonTranslatedAsSelenocysteine) throws TranslationException {
return Translator.getTranslator(translationTableId).translate(dnaSequence, phase, stopCodonTranslatedAsSelenocysteine);
}
}