package statalign.model.score.plugins; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Arrays; import statalign.io.input.FileTokenReader; import statalign.model.score.SubstitutionScore; /** * * An implementation of the BLOSUM62 distances * This is the distance and not the similarity matrix!!! * * @author miklos, novak * */ public class Blosum62 extends SubstitutionScore{ final String aaCodes = "ARNDCQEGHILKMFPSTWYV"; // plus ambiguous: BZJX /** * This constructor reads the distances from data/aadist.dat * and sets the which[][] array. * @throws IOException * @throws FileNotFoundException */ public Blosum62() throws IOException, FileNotFoundException{ which = new int[256][20]; /* tells amino acid no. for each valid char */ dist = new int[20][20]; /* amino acid distances in Gotoh's alg. */ // System.out.println("I'm in Blosum62 constructor!"); // System.out.println("I'm in Blosum62 constructor! "+which); //System.out.println("I'm in Blosum62 constructor! "+which.length); FileTokenReader file = new FileTokenReader("data/aadist.dat"); for(int i = 0; i < 20; i++){ for(int j = 0; j <= i; j++){ dist[i][j] = dist[j][i] = file.readDbl().intValue(); } } file.close(); // for(int i = 0; i < 256; i++){ // which[i] = -1; /* no amino acid associated */ for(int i = 0; i < aaCodes.length(); i++){ which[aaCodes.charAt(i)][i] = which[Character.toLowerCase(aaCodes.charAt(i))][i] = 1; } int i; // ambiguous amino acids follow // B: N or D i = aaCodes.indexOf('N'); which['B'][i] = which['b'][i] = 1; i = aaCodes.indexOf('D'); which['B'][i] = which['b'][i] = 1; // Z: E or Q i = aaCodes.indexOf('E'); which['Z'][i] = which['z'][i] = 1; i = aaCodes.indexOf('Q'); which['Z'][i] = which['z'][i] = 1; // J: L or K i = aaCodes.indexOf('L'); which['J'][i] = which['j'][i] = 1; i = aaCodes.indexOf('K'); which['J'][i] = which['j'][i] = 1; // X: any Arrays.fill(which['X'], 1); Arrays.fill(which['x'], 1); } }