package aliview;
import java.util.ArrayList;
import java.util.Arrays;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
public class NucleotideUtilities {
private static final Logger logger = Logger.getLogger(NucleotideUtilities.class);
public static final int GAP = 0;
public static final int A = 1;
public static final int C = 2;
public static final int G = 4;
public static final int TU = 8;
public static final int U = 8;
public static final int T = 8;
public static final int UNKNOWN = 32;
public static final int R = A + G;
public static final int Y = C + TU;
public static final int M = C + A;
public static final int K = TU + G;
public static final int W = TU + A;
public static final int S = C + G;
public static final int B = C + TU + G;
public static final int D = A + TU + G;
public static final int H = A + TU + C;
public static final int V = A + C + G;
public static final int N = A + C + T + G;
public static final int baseValFromBase(byte base){
return baseValFromChar((char)base);
}
public static final byte complement(char base){
byte complement;
switch (base) {
case 'A':
complement = 'T';
break;
case 'C':
complement = 'G';
break;
case 'G':
complement = 'C';
break;
case 'T':
case 'U':
complement = 'A';
break;
case 'R':
complement = 'Y';
break;
case 'Y':
complement = 'R';
break;
case 'M':
complement = 'K';
break;
case 'K':
complement = 'M';
break;
case 'W':
complement = 'W';
break;
case 'S':
complement = 'S';
break;
case 'B':
complement = 'V';
break;
case 'D':
complement = 'H';
break;
case 'H':
complement = 'D';
break;
case 'V':
complement = 'B';
break;
case 'N':
complement = 'N';
break;
case 'a':
complement = 't';
break;
case 'c':
complement = 'g';
break;
case 'g':
complement = 'c';
break;
case 't':
case 'u':
complement = 'a';
break;
case 'r':
complement = 'y';
break;
case 'y':
complement = 'r';
break;
case 'm':
complement = 'k';
break;
case 'k':
complement = 's';
break;
case 'w':
complement = 'w';
break;
case 's':
complement = 's';
break;
case 'b':
complement = 'v';
break;
case 'd':
complement = 'h';
break;
case 'h':
complement = 'd';
break;
case 'v':
complement = 'b';
break;
case 'n': case '?': case '.':
complement = 'n';
break;
case '-':
complement = '-';
break;
default:
complement = (byte)base;
break;
}
return complement;
}
public static final boolean isGap(byte base){
if(baseValFromBase(base) == GAP){
return true;
}else{
return false;
}
}
public static final boolean isNucleoticeOrIUPAC(byte base){
//logger.info("baseValFromBase(base)" + baseValFromBase(base));
int baseVal = baseValFromBase(base);
if(baseVal != GAP && baseVal != UNKNOWN){
return true;
}else{
return false;
}
}
public static final int baseValFromChar(char base){
int baseVal;
switch (base) {
case 'A': case 'a':
baseVal = A;
break;
case 'C': case 'c':
baseVal = C;
break;
case 'G': case 'g':
baseVal = G;
break;
case 'T': case 't':
case 'U': case 'u':
baseVal = TU;
break;
case 'R': case 'r':
baseVal = A + G;
break;
case 'Y': case 'y':
baseVal = C + TU;
break;
case 'M': case 'm':
baseVal = C + A;
break;
case 'K': case 'k':
baseVal = TU + G;
break;
case 'W': case 'w':
baseVal = TU + A;
break;
case 'S': case 's':
baseVal = C + G;
break;
case 'B': case 'b':
baseVal = C + TU + G;
break;
case 'D': case 'd':
baseVal = A + TU + G;
break;
case 'H': case 'h':
baseVal = A + TU + C;
break;
case 'V': case 'v':
baseVal = A + C + G;
break;
case 'N': case 'n': case '?': case '.':
baseVal = A + TU + C + G;
break;
case '-': case '_': case ' ': case '\n': case '\r':
baseVal = GAP;
break;
default:
baseVal = UNKNOWN;
break;
}
return baseVal;
}
public static byte byteFromBaseVal(int consensusVal) {
char charVal = charFromBaseVal(consensusVal);
//logger.info(charVal);
return (byte) charVal;
}
public static final char charFromBaseVal(int baseVal){
char base;
switch (baseVal) {
case A:
base = 'A';
break;
case C:
base = 'C';
break;
case G:
base = 'G';
break;
case TU:
base = 'T';
break;
case A + G:
base = 'R';
break;
case C + TU:
base = 'Y';
break;
case C + A:
base = 'M';
break;
case TU + G:
base = 'K';
break;
case TU + A:
base = 'W';
break;
case C + G:
base = 'S';
break;
case C + TU + G:
base = 'B';
break;
case A + TU + G:
base = 'D';
break;
case A + TU + C:
base = 'H';
break;
case A + C + G:
base = 'V';
break;
case A + TU + C + G:
base = 'N';
break;
case GAP:
base = '-';
break;
default:
base = '?';
break;
}
return base;
}
public static final int degenFoldFromChar(char base){
int degenFold;
switch (base) {
case 'A': case 'a': case 'C': case 'c': case 'G': case 'g': case 'T': case 't': case 'U': case 'u':
degenFold = 1;
break;
case 'R': case 'r': case 'Y': case 'y': case 'M': case 'm': case 'K': case 'k': case 'W': case 'w': case 'S': case 's':
degenFold = 2;
break;
case 'B': case 'b': case 'D': case 'd': case 'H': case 'h': case 'V': case 'v':
degenFold = 3;
break;
case 'N': case 'n': case '?':
degenFold = 4;
break;
case '-':
degenFold = 1;
break;
default:
degenFold = 1;
break;
}
return degenFold;
}
public static char[] nucleotideCharsFromBaseVal(int baseVal) {
String nuces = "";
if( (A & baseVal) == A){
nuces += "A";
}
if( (C & baseVal) == C){
nuces += "C";
}
if( (4 & baseVal) == 4){
nuces += "G";
}
if( (8 & baseVal) == 8){
nuces += "T";
}
return nuces.toCharArray();
}
public static ArrayList<String> regenerateDegenerated(String input) {
ArrayList<String> sequences = new ArrayList<String>();
sequences.add(input);
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "R", "AG");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "Y", "CT");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "M", "CA");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "K", "TG");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "W", "TA");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "S", "CG");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "B", "CTG");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "D", "ATG");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "H", "ATC");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "V", "TGC");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
// loop through sequences - new replaced are added to end of list and will be iterated through also
for(int n = 0; n < sequences.size(); n++){
String nextSeq = sequences.get(n);
ArrayList<String> replaced = deUpac(nextSeq, "N", "ATGC");
// if sequence contained code remove it from collection an add new ones
if(replaced != null && replaced.size() > 0){
sequences.remove(n);
sequences.addAll(replaced);
// lower index to start on the one that takes the place when one is removed
n = n -1;
}
}
return sequences;
}
private static ArrayList<String> deUpac(String input, String upac, String replace){
ArrayList<String> replaced = null;
if(input.indexOf(upac) != -1){
replaced = new ArrayList<String>();
for(int n = 0; n < replace.length(); n++){
String replacedString = input.replace(upac.charAt(0), replace.charAt(n));
replaced.add(replacedString);
// replaced.add(StringUtils.replaceOnce(input,upac, replace.substring(n, n+1)));
}
}
return replaced;
}
/*
private static ArrayList<String> deUpac(String input, String upac, String replace){
ArrayList<String> replaced = null;
if(input.indexOf(upac) != -1){
replaced = new ArrayList<String>();
for(int n = 0; n < replace.length(); n++){
replaced.add(StringUtils.replaceOnce(input,upac, replace.substring(n, n+1)));
}
}
return replaced;
}
*/
public static final String reverse(String input) {
StringBuffer sb = new StringBuffer(input);
String reverse = sb.reverse().toString();
return reverse;
}
public static final String complement(String input) {
String NORMAL = "AaCcGgTtUuRrYyKkMmSsWwBbDdHhVvNn-";
String COMPLEMENT = "TtGgCcAaAaYyRrMmKkSsWwVvHhDdBbNn-";
String output = "";
for(int n = 0; n < input.length(); n++){
char normChar = input.charAt(n);
int nuclIndex = NORMAL.indexOf(normChar);
if(nuclIndex > -1){
output += COMPLEMENT.charAt(nuclIndex);
}
// character not in list retain original
else{
output += normChar;
}
}
return output;
}
public static final String revComp(String sequence) {
return reverse( complement(sequence) );
}
public static boolean containsA(char input){
int baseVal = baseValFromChar(input);
if( (A & baseVal) == A){
return true;
}
else{
return false;
}
}
public static boolean containsC(char input){
int baseVal = baseValFromChar(input);
if( (C & baseVal) == C){
return true;
}
else{
return false;
}
}
public static boolean containsT(char input){
int baseVal = baseValFromChar(input);
if( (T & baseVal) == T){
return true;
}
else{
return false;
}
}
public static boolean containsG(char input){
int baseVal = baseValFromChar(input);
if( (G & baseVal) == G){
return true;
}
else{
return false;
}
}
public static int getDimerBinding(char n1Char, char n2Char) {
int n1Val = baseValFromChar(n1Char);
int n2Val = baseValFromChar(n2Char);
int bindVal = 0;
int bindCount = 0;
// check
if(containsA(n1Char) && containsT(n2Char)){
bindVal += 2;
bindCount ++;
}
if(containsT(n1Char) && containsA(n2Char)){
bindVal += 2;
bindCount ++;
}
if(containsC(n1Char) && containsG(n2Char)){
bindVal += 4;
bindCount ++;
}
if(containsG(n1Char) && containsC(n2Char)){
bindVal += 4;
bindCount ++;
}
int degenerateFold = degenFoldFromChar(n1Char) * degenFoldFromChar(n2Char);
// is degenerate - calculate new bindval
if(degenerateFold > 1){
bindVal = bindCount*2 / degenerateFold;
}
return bindVal;
}
public static final String[] seqToDeUPACStringArray(String sequence) {
// create a new char-matrix to hold result
char[][] matrix = new char[4][sequence.length()];
for(char[] row: matrix){
Arrays.fill(row, ' ');
}
for(int x = 0; x < sequence.length(); x++){
char baseChar = sequence.charAt(x);
int baseVal = baseValFromChar(baseChar);
char[] nucleotidesInThisXpos = nucleotideCharsFromBaseVal(baseVal);
// loop through all chars in this pos
for(int y = 0; y < nucleotidesInThisXpos.length; y++){
matrix[y][x]=nucleotidesInThisXpos[y];
}
}
String[] stringArray = new String[4];
for(int n = 0; n < matrix.length; n++){
stringArray[n] = new String(matrix[n]);
}
return stringArray;
}
public static void complement(byte[] byteSeq) {
for(int n = 0; n < byteSeq.length; n++){
byteSeq[n] = complement((char)byteSeq[n]);
}
}
public static byte getConsensusFromBases(byte base1, byte base2) {
int baseVal1 = baseValFromBase(base1);
int baseVal2 = baseValFromBase(base2);
// Create consensus by bitwise OR of the bases in the same column
int consensusVal = baseVal1 | baseVal2;
// skip GAP
/*
if((consensusVal & NucleotideUtilities.GAP) == NucleotideUtilities.GAP){
consensusVal = consensusVal - NucleotideUtilities.GAP;
}
*/
byte consensus = byteFromBaseVal(consensusVal);
return consensus;
}
public static byte getMinBase(byte base1, byte base2) {
int baseVal1 = baseValFromBase(base1);
int baseVal2 = baseValFromBase(base2);
int minBaseVal = Math.min(baseVal1,baseVal2);
return byteFromBaseVal(minBaseVal);
}
public static boolean isAtLeastOneGap(byte base1, byte base2) {
if(isGap(base1) || isGap(base2)){
return true;
}
else{
return false;
}
}
static byte[] allResidues = new byte[256];
public static int baseValFromBaseOtherVer(byte residue) {
return allResidues[residue];
}
}