package com.compomics.util.experiment.biology.variants;
import com.compomics.util.experiment.biology.AminoAcid;
import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
/**
* Matrix of amino acid substitutions. This class contains pre-implemented
* matrices.
*
* @author Marc Vaudel
*/
public class AaSubstitutionMatrix implements Serializable {
/**
* Serial number for backward compatibility.
*/
static final long serialVersionUID = -4257237524665484732L;
/**
* The name of this substitution matrix.
*/
private String name;
/**
* The description of the substitution matrix.
*/
private String description;
/**
* Map of the possible amino acid substitutions: original aa >
* substituted aa.
*/
private final HashMap<Character, HashSet<Character>> substitutions = new HashMap<Character, HashSet<Character>>(26);
/**
* Reverse map of the possible amino acid substitution: substituted aa >
* original aa.
*/
private final HashMap<Character, HashSet<Character>> reverseMap = new HashMap<Character, HashSet<Character>>(26);
/**
* Empty substitution matrix.
*/
public static final AaSubstitutionMatrix noSubstitution = new AaSubstitutionMatrix("No Substitution", "No substitution");
/**
* Substitution matrix allowing for a single base substitution.
*/
public static final AaSubstitutionMatrix singleBaseSubstitution = singleBaseSubstitution();
/**
* Substitution matrix allowing for a single base transition variant.
*/
public static final AaSubstitutionMatrix transitionsSingleBaseSubstitution = transitionsSingleBaseSubstitution();
/**
* Substitution matrix allowing for a single base transversion variant.
*/
public static final AaSubstitutionMatrix transversalSingleBaseSubstitution = transversalSingleBaseSubstitution();
/**
* Substitution matrix allowing all substitutions.
*/
public static final AaSubstitutionMatrix allSubstitutions = all();
/**
* Substitution matrix grouping synonymous amino acids. Amino acids are
* grouped according to their side chain properties: - Non-polar aliphatic
* groups: {'G', 'A', 'V', 'L', 'M', 'I'} - Aromatic groups: {'F', 'Y', 'W'}
* - Polar neutral groups: {'S', 'T', 'C', 'P', 'N', 'Q'} - Basic groups:
* {'K', 'R', 'H'} - Acidic groups: {'D', 'E'}.
*/
public static final AaSubstitutionMatrix synonymousVariant = synonymousVariant();
/**
* Returns the implemented default substitution matrices.
*/
public static final AaSubstitutionMatrix[] defaultMutationMatrices = new AaSubstitutionMatrix[]{
noSubstitution, singleBaseSubstitution, transitionsSingleBaseSubstitution, transversalSingleBaseSubstitution, synonymousVariant, allSubstitutions};
/**
* Constructor.
*
* @param name the name of this substitution matrix
* @param description the description of the substitution matrix
*/
public AaSubstitutionMatrix(String name, String description) {
this.name = name;
this.description = description;
}
/**
* Adds a possible substitution.
*
* @param originalAa the original amino acid represented by its single
* letter code
* @param substitutionAa the substituted amino acid represented by its
* single letter code
*/
public void addSubstitution(Character originalAa, Character substitutionAa) {
HashSet<Character> substitutedAas = substitutions.get(originalAa);
if (substitutedAas == null) {
substitutedAas = new HashSet<Character>();
substitutions.put(originalAa, substitutedAas);
}
substitutedAas.add(substitutionAa);
HashSet<Character> originalAas = reverseMap.get(originalAa);
if (originalAas == null) {
originalAas = new HashSet<Character>();
reverseMap.put(substitutionAa, originalAas);
}
originalAas.add(originalAa);
}
/**
* Returns the possible substituted amino acids for the given amino acid as
* a list of their single letter code. Null if none found.
*
* @param originalAminoAcid the amino acid of interest
*
* @return the possible substituted amino acids
*/
public HashSet<Character> getSubstitutionAminoAcids(Character originalAminoAcid) {
return substitutions.get(originalAminoAcid);
}
/**
* Returns the possible original amino acids for the given substituted amino
* acid as a list of their single letter code. Null if none found.
*
* @param substitutedAminoAcid the substitution amino acid of interest
*
* @return the possible original amino acids for the given substituted amino
* acid
*/
public HashSet<Character> getOriginalAminoAcids(Character substitutedAminoAcid) {
return reverseMap.get(substitutedAminoAcid);
}
/**
* Returns the amino acids where a substitution has been registered.
*
* @return the amino acids where a substitution has been registered
*/
public HashSet<Character> getOriginalAminoAcids() {
return new HashSet<Character>(substitutions.keySet());
}
/**
* Returns the possible substituted amino acids.
*
* @return the possible substituted amino acids
*/
public HashSet<Character> getSubstitutionAminoAcids() {
return new HashSet<Character>(substitutions.keySet());
}
/**
* Adds the content of a substitution matrix in this matrix.
*
* @param otherMatrix the other matrix to add
*/
public void add(AaSubstitutionMatrix otherMatrix) {
for (Character originalAa : otherMatrix.getOriginalAminoAcids()) {
for (Character substitutionAa : otherMatrix.getSubstitutionAminoAcids(originalAa)) {
addSubstitution(originalAa, substitutionAa);
}
}
}
/**
* Returns the substitution matrix allowing for a single base substitution.
*
* @return the substitution matrix allowing for a single base substitution
*/
private static AaSubstitutionMatrix singleBaseSubstitution() {
AaSubstitutionMatrix result = new AaSubstitutionMatrix("Single Base Substitution", "Single base substitutions");
char[] bases = {'A', 'T', 'G', 'C'};
for (char originalAa : AminoAcid.getUniqueAminoAcids()) {
AminoAcid aminoAcid = AminoAcid.getAminoAcid(originalAa);
for (String geneCode : aminoAcid.getStandardGeneticCode()) {
StringBuilder geneCodeStringBuilder = new StringBuilder(geneCode);
for (int i = 0; i < geneCode.length(); i++) {
char originalBase = geneCode.charAt(i);
for (char base : bases) {
geneCodeStringBuilder.setCharAt(i, base);
String newCode = geneCodeStringBuilder.toString();
AminoAcid substitutionAminoAcid = AminoAcid.getAminoAcidFromGeneticCode(newCode);
if (substitutionAminoAcid != null) {
char substitutionAa = substitutionAminoAcid.getSingleLetterCodeAsChar();
if (originalAa != substitutionAa) {
result.addSubstitution(originalAa, substitutionAa);
}
}
}
geneCodeStringBuilder.setCharAt(i, originalBase);
}
}
}
return result;
}
/**
* Returns the substitution matrix allowing for a single base transitions
* variant.
*
* @return the substitution matrix allowing for a single base transitions
* variant
*/
private static AaSubstitutionMatrix transitionsSingleBaseSubstitution() {
AaSubstitutionMatrix result = new AaSubstitutionMatrix("Single Base Transition", "Single base transitions substitutions.");
char[] purines = {'A', 'G'}, pyrimidines = {'T', 'C'};
for (char originalAa : AminoAcid.getUniqueAminoAcids()) {
if (originalAa != 'X') {
AminoAcid aminoAcid = AminoAcid.getAminoAcid(originalAa);
for (String geneCode : aminoAcid.getStandardGeneticCode()) {
StringBuilder geneCodeStringBuilder = new StringBuilder(geneCode);
for (int i = 0; i < geneCode.length(); i++) {
char originalBase = geneCode.charAt(i);
char[] bases;
if (originalBase == purines[0] || originalBase == purines[1]) {
bases = purines;
} else if (originalBase == pyrimidines[0] || originalBase == pyrimidines[1]) {
bases = pyrimidines;
} else {
throw new IllegalArgumentException(originalBase + " not recognized for transitions substitution.");
}
for (char base : bases) {
geneCodeStringBuilder.setCharAt(i, base);
String newCode = geneCodeStringBuilder.toString();
AminoAcid substitutionAminoAcid = AminoAcid.getAminoAcidFromGeneticCode(newCode);
if (substitutionAminoAcid != null) {
char substitutionAa = substitutionAminoAcid.getSingleLetterCodeAsChar();
if (originalAa != substitutionAa) {
result.addSubstitution(originalAa, substitutionAa);
}
}
}
geneCodeStringBuilder.setCharAt(i, originalBase);
}
}
}
}
return result;
}
/**
* Returns the substitution matrix allowing for a single base transversion
* variant.
*
* @return the substitution matrix allowing for a single base transversion
* variant
*/
private static AaSubstitutionMatrix transversalSingleBaseSubstitution() {
AaSubstitutionMatrix result = new AaSubstitutionMatrix("Single Base Transversion", "Single base transversion substitutions.");
char[] purines = {'A', 'G'}, pyrimidines = {'T', 'C'};
for (char originalAa : AminoAcid.getUniqueAminoAcids()) {
AminoAcid aminoAcid = AminoAcid.getAminoAcid(originalAa);
for (String geneCode : aminoAcid.getStandardGeneticCode()) {
StringBuilder geneCodeStringBuilder = new StringBuilder(geneCode);
for (int i = 0; i < geneCode.length(); i++) {
char originalBase = geneCode.charAt(i);
char[] bases;
if (originalBase == purines[0] || originalBase == purines[1]) {
bases = pyrimidines;
} else if (originalBase == pyrimidines[0] || originalBase == pyrimidines[1]) {
bases = purines;
} else {
throw new IllegalArgumentException(originalBase + " not recognized for transversion substitutions.");
}
for (char base : bases) {
geneCodeStringBuilder.setCharAt(i, base);
String newCode = geneCodeStringBuilder.toString();
AminoAcid substitutionAminoAcid = AminoAcid.getAminoAcidFromGeneticCode(newCode);
if (substitutionAminoAcid != null) {
char substitutionAa = substitutionAminoAcid.getSingleLetterCodeAsChar();
if (originalAa != substitutionAa) {
result.addSubstitution(originalAa, substitutionAa);
}
}
}
geneCodeStringBuilder.setCharAt(i, originalBase);
}
}
}
return result;
}
/**
* Returns a substitution matrix grouping synonymous amino acids. Amino
* acids are grouped according to their side chain properties: - Non-polar
* aliphatic groups: {'G', 'A', 'V', 'L', 'M', 'I'} - Aromatic groups: {'F',
* 'Y', 'W'} - Polar neutral groups: {'S', 'T', 'C', 'P', 'N', 'Q'} - Basic
* groups: {'K', 'R', 'H'} - Acidic groups: {'D', 'E'}.
*
* @return a substitution matrix grouping synonymous amino acids
*/
private static AaSubstitutionMatrix synonymousVariant() {
AaSubstitutionMatrix result = new AaSubstitutionMatrix("Synonymous Variant", "Variants keeping amino acid properties.");
char[] nonPolarAliphatic = new char[]{'G', 'A', 'V', 'L', 'M', 'I'};
for (char originalAminoAcid : nonPolarAliphatic) {
for (char substitutionAminoAcid : nonPolarAliphatic) {
if (originalAminoAcid != substitutionAminoAcid) {
result.addSubstitution(originalAminoAcid, substitutionAminoAcid);
}
}
}
char[] aromatic = new char[]{'F', 'Y', 'W'};
for (char originalAminoAcid : aromatic) {
for (char substitutionAminoAcid : aromatic) {
if (originalAminoAcid != substitutionAminoAcid) {
result.addSubstitution(originalAminoAcid, substitutionAminoAcid);
}
}
}
char[] polarNeutral = new char[]{'S', 'T', 'C', 'P', 'N', 'Q'};
for (char originalAminoAcid : polarNeutral) {
for (char substitutionAminoAcid : polarNeutral) {
if (originalAminoAcid != substitutionAminoAcid) {
result.addSubstitution(originalAminoAcid, substitutionAminoAcid);
}
}
}
char[] basic = new char[]{'K', 'R', 'H'};
for (char originalAminoAcid : basic) {
for (char substitutionAminoAcid : basic) {
if (originalAminoAcid != substitutionAminoAcid) {
result.addSubstitution(originalAminoAcid, substitutionAminoAcid);
}
}
}
char[] acidic = new char[]{'D', 'E'};
for (char originalAminoAcid : acidic) {
for (char substitutionAminoAcid : acidic) {
if (originalAminoAcid != substitutionAminoAcid) {
result.addSubstitution(originalAminoAcid, substitutionAminoAcid);
}
}
}
return result;
}
/**
* Returns the substitution matrix allowing all substitutions.
*
* @return the substitution matrix allowing all substitutions
*/
private static AaSubstitutionMatrix all() {
AaSubstitutionMatrix result = new AaSubstitutionMatrix("All", "All possible substitutions.");
for (char originalAa : AminoAcid.getUniqueAminoAcids()) {
for (char varianAa : AminoAcid.getUniqueAminoAcids()) {
if (originalAa != varianAa) {
result.addSubstitution(originalAa, varianAa);
}
}
}
return result;
}
/**
* Returns the name of this substitution matrix.
*
* @return the name of this substitution matrix
*/
public String getName() {
return name;
}
/**
* Sets the name of this substitution matrix.
*
* @param name the name of this substitution matrix
*/
public void setName(String name) {
this.name = name;
}
/**
* Returns the description of this substitution matrix.
*
* @return the description of this substitution matrix
*/
public String getDescription() {
return description;
}
/**
* Sets the description of this substitution matrix.
*
* @param description the description of this substitution matrix
*/
public void setDescription(String description) {
this.description = description;
}
/**
* Indicates whether the given AaSubstitutionMatrix is the same as this one.
*
* @param aaSubstitutionMatrix the substitution matrix
*
* @return a boolean indicating whether the given AaSubstitutionMatrix is
* the same as this one
*/
public boolean isSameAs(AaSubstitutionMatrix aaSubstitutionMatrix) {
if (this.equals(aaSubstitutionMatrix)) {
return true;
}
if (!name.equals(aaSubstitutionMatrix.getName())) {
return false;
}
if (!description.equals(aaSubstitutionMatrix.getDescription())) {
return false;
}
for (Character aa : substitutions.keySet()) {
HashSet<Character> aaMutations = substitutions.get(aa);
HashSet<Character> otherMutations = aaSubstitutionMatrix.getSubstitutionAminoAcids(aa);
if (otherMutations == null || aaMutations.size() != otherMutations.size()) {
return false;
}
for (Character substitutionAa : aaMutations) {
if (!otherMutations.contains(substitutionAa)) {
return false;
}
}
}
return true;
}
@Override
public String toString() {
return name;
}
}