package statalign.model.subst.plugins; import java.awt.Color; import statalign.io.RawSequences; import statalign.model.subst.RecognitionError; import statalign.model.subst.SubstitutionModel; /** * Common superclass for amino acid substitution models. * * @author novak */ public abstract class AminoAcidModel extends SubstitutionModel { public static final String type = "protein"; static final String magentaCharacters = "RHK"; static final String redCharacters = "AVFPMILW"; static final String blueCharacters = "DE"; static final String greenCharacters = "STYCNGQ"; static final Color char2color[] = new Color[255]; static { int i; for(i = 0; i < 255; i++) char2color[i] = Color.LIGHT_GRAY; for(i = 0; i < magentaCharacters.length(); i++) char2color[magentaCharacters.charAt(i)] = Color.MAGENTA; for(i = 0; i < redCharacters.length(); i++) char2color[redCharacters.charAt(i)] = Color.RED; for(i = 0; i < blueCharacters.length(); i++) char2color[blueCharacters.charAt(i)] = Color.BLUE; for(i = 0; i < greenCharacters.length(); i++) char2color[greenCharacters.charAt(i)] = Color.GREEN; } /** * Returns the color of a character. * * Magenta characters are 'RHK', red characters are 'AVFPMILW', * blue characters are 'DE' and green characters are 'STYCNGQ'. */ @Override public Color getColor(char c) { return char2color[c]; // 'cause speed matters... } /** * Dummy function with the possibility of further developments. */ @Override public String print() { return ""; } /** * Empty method provided for models with no parameters (the evolutionary time * is represented in the edge lengths). */ @Override public void restoreParameter() { } /** * It does nothing and returns 0, i.e log-probability 1. */ @Override public double sampleParameter() { return 0.0; } /** * This function decides if the model can accept the input sequences. */ @Override public double acceptable(RawSequences r) { int[] count = new int[alphabet.length]; String accept = new String(alphabet).toUpperCase()+"BZJX"; // accept ambiguous amino acids for(int i = 0; i < r.size(); i++){ String sequence = r.getSequence(i); for(int j = 0; j < sequence.length(); j++){ char ch = sequence.charAt(j); if(ch != '-' && ch != ' ') { int k = accept.indexOf(Character.toUpperCase(ch)); if(k == -1) { throw new RecognitionError(getMenuName()+" cannot be used with the current sequences because they contain the character '"+ch+"'!\n"); } else if(k < alphabet.length) { count[k] = 1; } } } } int sum = 0; for(int i = 0; i < count.length; i++){ sum += count[i]; } return (double)sum/(double)count.length + 0.1; } /** * Returns the most likely character given the Felsentein likelihood array. */ @Override public char mostLikely(double[] seq) { // "ARNDCQEGHILKMFPSTWYV"; // plus ambiguous: BZJX double max = 0.0; char character = '*'; for(int i = 0; i < seq.length; i++){ if(seq[i] > max){ character = alphabet[i]; max = seq[i]; } } if(character == 'A' && seq[0] == seq[1]) // X: any character = 'X'; else if(character == 'N' && seq[2] == seq[3]) // B: N or D character = 'B'; else if(character == 'Q' && seq[5] == seq[6]) // Z: E or Q character = 'Z'; else if(character == 'L' && seq[10] == seq[11]) // J: L or K character = 'J'; return character; } }