package org.nextprot.api.commons.bio;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableSet;
import java.text.ParseException;
import java.util.*;
/**
* Amino-acids with their representation in one letter and three letter codes.
*
* Created by fnikitin on 09/07/15.
*/
public enum AminoAcidCode {
GLYCINE("Gly", "G"),
PROLINE("Pro", "P"),
ALANINE("Ala", "A"),
VALINE("Val", "V"),
LEUCINE("Leu", "L"),
ISOLEUCINE("Ile", "I"),
METHIONINE("Met", "M"),
CYSTEINE("Cys", "C"),
PHENYLALANINE("Phe", "F"),
TYROSINE("Tyr", "Y"),
TRYPTOPHAN("Trp", "W"),
HISTIDINE("His", "H"),
LYSINE("Lys", "K"),
ARGININE("Arg", "R"),
GLUTAMINE("Gln", "Q"),
ASPARAGINE("Asn", "N"),
GLUTAMIC_ACID("Glu", "E"),
ASPARTIC_ACID("Asp", "D"),
SERINE("Ser", "S"),
THREONINE("Thr", "T"),
SELENOCYSTEINE("Sec", "U"),
PYRROLYSINE("Pyl", "O"),
STOP("Ter", "*"),
// ambiguous amino-acids
ASX("Asx", "B"),
XLE("Xle", "J"),
GLX("Glx", "Z"),
XAA("Xaa", "X")
;
/** Type of amino-acid code defined by its number of letters */
public enum CodeType {
ONE_LETTER(1),
THREE_LETTER(3)
;
private final int len;
CodeType(int len) {
this.len = len;
}
public int getCodeLen() {
return len;
}
}
private final String code3;
private final String code1;
private static final Map<String, AminoAcidCode> AMINO_ACID_CODE_MAP;
private static final Map<AminoAcidCode, Set<AminoAcidCode>> AMINO_ACID_AMBIGUITIES;
private static final Set<AminoAcidCode> NON_AMBIGUOUS_AMINO_ACIDS;
private static final Set<AminoAcidCode> AMBIGUOUS_AMINO_ACIDS;
static {
NON_AMBIGUOUS_AMINO_ACIDS = ImmutableSet.copyOf(EnumSet.of(GLYCINE, PROLINE, ALANINE, VALINE, LEUCINE, ISOLEUCINE, METHIONINE, CYSTEINE,
PHENYLALANINE, TYROSINE, THREONINE, TRYPTOPHAN, HISTIDINE, LYSINE, ARGININE, GLUTAMINE, ASPARAGINE,
GLUTAMIC_ACID, ASPARTIC_ACID, SERINE, THREONINE, SELENOCYSTEINE, PYRROLYSINE, STOP));
AMBIGUOUS_AMINO_ACIDS = ImmutableSet.copyOf(EnumSet.of(ASX, XLE, GLX, XAA));
AMINO_ACID_CODE_MAP = new HashMap<>(AminoAcidCode.values().length);
for (AminoAcidCode aac : AminoAcidCode.values()) {
AMINO_ACID_CODE_MAP.put(aac.get1LetterCode(), aac);
AMINO_ACID_CODE_MAP.put(aac.get3LetterCode(), aac);
}
AMINO_ACID_AMBIGUITIES = new EnumMap<>(AminoAcidCode.class);
AMINO_ACID_AMBIGUITIES.put(ASX, EnumSet.of(ASPARTIC_ACID, ASPARAGINE));
AMINO_ACID_AMBIGUITIES.put(XLE, EnumSet.of(LEUCINE, ISOLEUCINE));
AMINO_ACID_AMBIGUITIES.put(GLX, EnumSet.of(GLUTAMIC_ACID, GLUTAMINE));
AMINO_ACID_AMBIGUITIES.put(XAA, NON_AMBIGUOUS_AMINO_ACIDS);
}
AminoAcidCode(String code3, String code1) {
this.code3 = code3;
this.code1 = code1;
}
/**
* @return the amino-acid 3-letter code
*/
public String get3LetterCode() {
return code3;
}
/**
* @return the amino-acid 1-letter code
*/
public String get1LetterCode() {
return code1;
}
/**
* @return true if this aminoAcidCode is ambiguous else false
*/
public boolean isAmbiguous() {
return AMBIGUOUS_AMINO_ACIDS.contains(this);
}
/**
* Check that the given amino-acid match this amino-acid
* @param aminoAcidCode amino-acid to check
* @return true if matches the given aminoAcidCode else false
*/
public boolean match(AminoAcidCode aminoAcidCode) {
if (isAmbiguous())
return AMINO_ACID_AMBIGUITIES.get(this).contains(aminoAcidCode);
return this == aminoAcidCode;
}
/**
* Check validity of the given amino-acid code string
* @param code the amino-acid 1- or 3- letter code
* @return true if amino-acid code is valid else false
*/
public static boolean isValidAminoAcid(String code) {
return AMINO_ACID_CODE_MAP.containsKey(code);
}
/**
* Get an instance of AminoAcidCode for the given amino-acid code string
* @param code the amino-acid 1- or 3- letter code
* @return an AminoAcidCode given a string
*/
public static AminoAcidCode valueOfAminoAcid(String code) {
if (!isValidAminoAcid(code)) {
throw new IllegalArgumentException("No enum constant AminoAcid." + code);
}
return AMINO_ACID_CODE_MAP.get(code);
}
/**
* Get the set of non ambiguous amino-acids
* @return immutable set of non ambiguous amino-acids
*/
public static Set<AminoAcidCode> nonAmbiguousAminoAcidValues() {
return NON_AMBIGUOUS_AMINO_ACIDS;
}
/**
* Get the set of ambiguous amino-acids
* @return immutable set of ambiguous amino-acids
*/
public static Set<AminoAcidCode> ambiguousAminoAcidValues() {
return AMBIGUOUS_AMINO_ACIDS;
}
/**
* Parse sequence and make an instance of AminoAcidCode array (auto CodeType deduction)
* @param sequence the sequence to parse
* @return an array of AminoAcidCode
*/
public static AminoAcidCode[] valueOfAminoAcidCodeSequence(String sequence) {
Preconditions.checkNotNull(sequence);
if (sequence.length()>=3 && AminoAcidCode.isValidAminoAcid(sequence.substring(0, 3))) {
return valueOfAminoAcidCodeSequence(sequence, CodeType.THREE_LETTER);
}
return valueOfAminoAcidCodeSequence(sequence, CodeType.ONE_LETTER);
}
/**
* Parse sequence and make an instance of AminoAcidCode array
* @param sequence the sequence to parse
* @param codeType the amino-acid code type of the given sequence
* @return an array of AminoAcidCode
*/
public static AminoAcidCode[] valueOfAminoAcidCodeSequence(String sequence, CodeType codeType) {
Preconditions.checkNotNull(sequence);
Preconditions.checkNotNull(codeType);
if ((sequence.length() % codeType.getCodeLen()) != 0) {
throw new IllegalArgumentException("Invalid sequence length: " + sequence + " length is not a multiple of " + codeType);
}
int aminoAcidCount = sequence.length()/codeType.getCodeLen();
AminoAcidCode[] aminoAcidCodes = new AminoAcidCode[aminoAcidCount];
int from=0;
int aaIndex=0;
while (from<=sequence.length()-codeType.getCodeLen()) {
int to = from+codeType.getCodeLen();
aminoAcidCodes[aaIndex] = AminoAcidCode.valueOfAminoAcid(sequence.substring(from, to));
from = to;
aaIndex++;
}
return aminoAcidCodes;
}
/**
* Format AminoAcidCodes into string
* @param type the amino-acid code type (1- or 3- letter code)
* @param aas amino-acids to format
* @return a formatted string
*/
public static String formatAminoAcidCode(CodeType type, AminoAcidCode... aas) {
StringBuilder sb = new StringBuilder();
for (AminoAcidCode aa : aas) {
sb.append((type == CodeType.ONE_LETTER) ? aa.get1LetterCode() : aa.get3LetterCode());
}
return sb.toString();
}
/**
* Get an instance of AminoAcidCode given
* @param code amino-acid code (1- or 3- letters)
* @return an AminoAcidCode
* @throws ParseException if code is not well formatted
*/
public static AminoAcidCode parseAminoAcidCode(String code) throws ParseException {
Preconditions.checkNotNull(code);
Preconditions.checkArgument(code.length() == 1 || code.length() == 3, "amino-acid code should be in 1 letter or 3 letters format");
if (!AminoAcidCode.isValidAminoAcid(code)) {
throw new ParseException(code+": invalid AminoAcidCode", 0);
}
return AminoAcidCode.valueOfAminoAcid(code);
}
}