/**
* Copyright (C) 2012-2013 Selventa, Inc.
*
* This file is part of the OpenBEL Framework.
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* The OpenBEL Framework is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
* or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
* License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with the OpenBEL Framework. If not, see <http://www.gnu.org/licenses/>.
*
* Additional Terms under LGPL v3:
*
* This license does not authorize you and you are prohibited from using the
* name, trademarks, service marks, logos or similar indicia of Selventa, Inc.,
* or, in the discretion of other licensors or authors of the program, the
* name, trademarks, service marks, logos or similar indicia of such authors or
* licensors, in any marketing or advertising materials relating to your
* distribution of the program or any covered product. This restriction does
* not waive or limit your obligation to keep intact all copyright notices set
* forth in the program as delivered to you.
*
* If you distribute the program in whole or in part, or any modified version
* of the program, and you assume contractual liability to the recipient with
* respect to the program or modified version, then you will indemnify the
* authors and licensors of the program for any liabilities that these
* contractual assumptions directly impose on those licensors and authors.
*/
package org.openbel.framework.common.enums;
import static org.openbel.framework.common.BELUtilities.sizedHashMap;
import static org.openbel.framework.common.BELUtilities.sizedHashSet;
import java.util.Map;
import java.util.Set;
/**
* Enumerated representation of amino acid.
* <p>
* Portions of this enum have been automatically generated from <a
* href="http://en.wikipedia.org/wiki/Amino_acid">here</a>.
* </p>
*
* @see CovalentModification#isAcetylated(AminoAcid)
* @see CovalentModification#isFarnesylated(AminoAcid)
* @see CovalentModification#isGlycolsylated(AminoAcid)
* @see CovalentModification#isHydroxylated(AminoAcid)
* @see CovalentModification#isMethylated(AminoAcid)
* @see CovalentModification#isPhosphorylated(AminoAcid)
* @see CovalentModification#isRibosylated(AminoAcid)
* @see CovalentModification#isSumoylated(AminoAcid)
* @see CovalentModification#isUbiquitinated(AminoAcid)
*/
public enum AminoAcid {
/**
* <p>
* Alanine (abbreviated as Ala or A) is an amino acid with the chemical
* formula CH3CH(NH2)COOH. The L-isomer is one of the 22 proteinogenic amino
* acids, i.e., the building blocks of proteins. Its codons are GCU, GCC,
* GCA, and GCG. It is classified as a nonpolar amino acid. L-Alanine is
* second only to leucine in rate of occurrence, accounting for 7.8% of the
* primary structure in a sample of 1,150 proteins. D-Alanine occurs in
* bacterial cell walls and in some peptide antibiotics.
* </p>
*/
ALANINE(0, "Alanine", "Ala", "A"),
/**
* <p>
* Arginine (abbreviated as Arg or R) is an amino acid. The L-form is one
* of the 20 most common natural amino acids. At the level of molecular
* genetics, in the structure of the messenger ribonucleic acid mRNA, CGU,
* CGC, CGA, CGG, AGA, and AGG, are the triplets of nucleotide bases or
* codons that codify for arginine during protein synthesis. In mammals,
* arginine is classified as a semiessential or conditionally essential
* amino acid, depending on the developmental stage and health status of the
* individual. Preterm infants are unable to synthesize or create arginine
* internally, making the amino acid nutritionally essential for them.
* Arginine was first isolated from a lupin seedling extract in 1886 by the
* Swiss chemist Ernst Schultze. In general, most people do not need to take
* arginine supplements because the body usually produces enough.
* </p>
*/
ARGININE(1, "Arginine", "Arg", "R"),
/**
* <p>
* Asparagine (abbreviated as Asn or N; Asx or B represent either asparagine
* or aspartic acid) is one of the 20 most common natural amino acids on
* Earth. It has carboxamide as the side-chain's functional group. It is not
* an essential amino acid. Its codons are AAU and AAC. A reaction between
* asparagine and reducing sugars or reactive carbonyls produces acrylamide
* (acrylic amide) in food when heated to sufficient temperature. These
* products occur in baked goods such as French fries, potato chips, and
* roasted coffee.
* </p>
*/
ASPARAGINE(2, "Asparagine", "Asn", "N"),
/**
* <p>
* Aspartic acid (abbreviated as Asp or D; Asx or B represent either
* aspartic acid or asparagine) is an amino acid with the chemical formula
* HOOCCH(NH2)CH2COOH. The carboxylate anion, salt, or ester of aspartic
* acid is known as aspartate. The L-isomer of aspartate is one of the 20
* proteinogenic amino acids, i.e., the building blocks of proteins. Its
* codons are GAU and GAC.
* <p>
* Aspartic acid is, together with glutamic acid, classified as an acidic
* amino acid with a pKa of 4.0. Aspartate is pervasive in biosynthesis. As
* with all amino acids, the presence of acid protons depends on the
* residue's local chemical environment and the pH of the solution.
* </p>
*/
ASPARTIC_ACID(3, "Aspartic acid", "Asp", "D"),
/**
* <p>
* Cysteine (abbreviated as Cys or C) is an amino acid with the chemical
* formula HO2CCH(NH2)CH2SH. It is a non-essential amino acid, which means
* that it is biosynthesized in humans. Its codons are UGU and UGC. The side
* chain on cysteine is thiol, which is nonpolar and thus cysteine is
* usually classified as a hydrophobic amino acid. The thiol side chain
* often participates in enzymatic reactions, serving as a nucleophile. The
* thiol is susceptible to oxidization to give the disulfide derivative
* cystine, which serves an important structural role in many proteins.
* Cysteine is named after cystine.
* </p>
*/
CYSTEINE(4, "Cysteine", "Cys", "C"),
/**
* <p>
* Glutamic acid (abbreviated as Glu or E) is one of the 20 proteinogenic
* amino acids, and its codons are GAA and GAG. It is a non-essential amino
* acid. The carboxylate anions and salts of glutamic acid are known as
* glutamates. In neuroscience, glutamate is an important neurotransmitter
* that plays a key role in long-term potentiation and is important for
* learning and memory.
* </p>
*/
GLUTAMIC_ACID(5, "Glutamic acid", "Glu", "E"),
/**
* <p>
* Glutamine (abbreviated as Gln or Q) is one of the 20 amino acids encoded
* by the standard genetic code. It is not recognized as an essential amino
* acid but may become conditionally essential in certain situations,
* including intensive athletic training or certain gastrointestinal
* disorders. Its side-chain is an amide formed by replacing the side-chain
* hydroxyl of glutamic acid with an amine functional group. Therefore, it
* can be considered the amide of glutamic acid. Its codons are CAA and CAG.
* In human blood, glutamine is the most abundant free amino acid, with a
* concentration of about 500-900 umol/l.
* </p>
*/
GLUTAMINE(6, "Glutamine", "Gln", "Q"),
/**
* <p>
* Glycine (abbreviated as Gly or G) is an organic compound with the formula
* NH2CH2COOH. With only two hydrogen atoms as its 'side chain', glycine is
* the smallest of the 20 amino acids commonly found in proteins. Its codons
* are GGU, GGC, GGA, GGG. Glycine is a colourless, sweet-tasting
* crystalline solid. It is unique among the proteinogenic amino acids in
* that it is not chiral. It can fit into hydrophilic or hydrophobic
* environments, due to its two hydrogen atom side chain.
* </p>
*/
GLYCINE(7, "Glycine", "Gly", "G"),
/**
* <p>
* Histidine (abbreviated as His or H) Histidine, an essential amino acid,
* has a positively charged imidazole functional group. It is the one of the
* 22 proteinogenic amino acids. Its codons are CAU and CAC. Histidine was
* first isolated by German physician Albrecht Kossel in 1896. Histidine is
* an essential amino acid in humans and other mammals. It was initially
* thought that it was only essential for infants, but longer-term studies
* established that it is also essential for adult humans.
* </p>
*/
HISTIDINE(8, "Histidine", "His", "H"),
/**
* <p>
* Isoleucine (abbreviated as Ile or I) is an amino acid with the chemical
* formula HO2CCH(NH2)CH(CH3)CH2CH3. It is an essential amino acid, which
* means that humans cannot synthesize it, so it must be ingested. Its
* codons are AUU, AUC and AUA. With a hydrocarbon side chain, isoleucine is
* classified as a hydrophobic amino acid. Together with threonine,
* isoleucine is one of two common amino acids that have a chiral side
* chain. Four stereoisomers of isoleucine are possible, including two
* possible diastereomers of L-isoleucine. However, isoleucine present in
* nature exists in one enantiomeric form, (2S,3S)-2-amino-3-methylpentanoic
* acid.
* </p>
*/
ISOLEUCINE(9, "Isoleucine", "Ile", "I"),
/**
* <p>
* Leucine (abbreviated as Leu or L) is a branched-chain amino acid with
* the chemical formula HO2CCH(NH2)CH2CH(CH3)2. It is an essential amino
* acid, which means that humans cannot synthesize it. Its codons are UUA,
* UUG, CUU, CUC, CUA, and CUG. With a hydrocarbon side chain, leucine is
* classified as a hydrophobic amino acid. It has an isobutyl R group.
* Leucine is a major component of the sub units in ferritin, astacin and
* other 'buffer' proteins.
* </p>
*/
LEUCINE(10, "Leucine", "Leu", "L"),
/**
* <p>
* Lysine (abbreviated as Lys or K) is an amino acid with the chemical
* formula HO2CCH(NH2)(CH2)4NH2. It is an essential amino acid, which means
* that the human body cannot synthesize it. Its codons are AAA and AAG.
* </p>
* <p>
* Lysine is a base, as are arginine and histidine. The amino group often
* participates in hydrogen bonding and as a general base in catalysis.
* Common posttranslational modifications include methylation of the amino
* group, giving methyl-, dimethyl-, and trimethyllysine. The latter occurs
* in calmodulin. Other posttranslational modifications at lysine residues
* include acetylation and ubiquitination. Collagen contains hydroxylysine
* which is derived from lysine by lysyl hydroxylase. O-Glycosylation of
* hydroxylysine residues in the endoplasmic reticulum or Golgi apparatus is
* used to mark certain proteins for secretion from the cell.
* </p>
*/
LYSINE(11, "Lysine", "Lys", "K"),
/**
* <p>
* Methionine abbreviated as Met or M) is an amino acid with the chemical
* formula HO2CCH(NH2)CH2CH2SCH3. This essential amino acid is classified as
* nonpolar.
* </p>
*/
METHIONINE(12, "Methionine", "Met", "M"),
/**
* <p>
* Phenylalanine (abbreviated as Phe or F) is an amino acid with the
* formula C6H5CH2CH(NH2)COOH. This essential amino acid is classified as
* nonpolar because of the hydrophobic nature of the benzyl side chain.
* L-Phenylalanine (LPA) is an electrically neutral amino acid, one of the
* twenty common amino acids used to biochemically form proteins, coded for
* by DNA. The codons for L-phenylalanine are UUU and UUC. Phenylalanine is
* a precursor for tyrosine, the monoamine signaling molecules dopamine,
* norepinephrine (noradrenaline), and epinephrine (adrenaline), and the
* skin pigment melanin.
* </p>
* <p>
* Phenylalanine is found naturally in the breast milk of mammals. It is
* used in the manufacture of food and drink products and sold as a
* nutritional supplement for its reputed analgesic and antidepressant
* effects. It is a direct precursor to the neuromodulator phenylethylamine,
* a commonly used dietary supplement.
* </p>
*/
PHENYLALANINE(13, "Phenylalanine", "Phe", "F"),
/**
* <p>
* Proline (abbreviated as Pro or P) is an amino acid, one of the twenty
* DNA-encoded amino acids. Its codons are CCU, CCC, CCA, and CCG. It is not
* an essential amino acid, which means that the human body can synthesize
* it. It is unique among the 20 protein-forming amino acids in that the
* amino group is secondary. The more common L form has S stereochemistry.
* </p>
*/
PROLINE(14, "Proline", "Pro", "P"),
/**
* <p>
* Serine (abbreviated as Ser or S) is an amino acid with the formula
* HO2CCH(NH2)CH2OH.
* </p>
*/
SERINE(15, "Serine", "Ser", "S"),
/**
* <p>
* Threonine (abbreviated as Thr or T) is an amino acid with the chemical
* formula HO2CCH(NH2)CH(OH)CH3. Its codons are ACU, ACA, ACC, and ACG. This
* essential amino acid is classified as polar. Together with serine,
* threonine is one of two proteinogenic amino acids bearing an alcohol
* group (tyrosine is not an alcohol but a phenol, since its hydroxyl group
* is bonded directly to an aromatic ring, giving it different acid/base and
* oxidative properties). It is also one of two common amino acids that bear
* a chiral side chain, along with isoleucine.
* </p>
* <p>
* The threonine residue is susceptible to numerous posttranslational
* modifications. The hydroxy side-chain can undergo O-linked glycosylation.
* In addition, threonine residues undergo phosphorylation through the
* action of a threonine kinase. In its phosphorylated form, it can be
* referred to as phosphothreonine.
* </p>
*/
THREONINE(16, "Threonine", "Thr", "T"),
/**
* <p>
* Tryptophan (IUPAC-IUBMB abbreviation: Trp or W; IUPAC abbreviation: L-Trp
* or D-Trp; sold for medical use as Tryptan) is one of the 20 standard
* amino acids, as well as an essential amino acid in the human diet. It is
* encoded in the standard genetic code as the codon UGG. The slight
* mispronunciation "tWiptophan" can be used as a mnemonic for its single
* letter IUPAC code W. Only the L-stereoisomer of tryptophan is used in
* structural or enzyme proteins, but the D-stereoisomer is occasionally
* found in naturally produced peptides (for example, the marine venom
* peptide contryphan). The distinguishing structural characteristic of
* tryptophan is that it contains an indole functional group. It is an
* essential amino acid as demonstrated by its growth effects on rats.
* </p>
*/
TRYPTOPHAN(17, "Tryptophan", "Trp", "W"),
/**
* <p>
* Tyrosine (abbreviated as Tyr or Y) or 4-hydroxyphenylalanine, is one of
* the 20 amino acids that are used by cells to synthesize proteins. Its
* codons are UAC and UAU. It is a non-essential amino acid with a polar
* side group. The word "tyrosine" is from the Greek tyri, meaning cheese,
* as it was first discovered in 1846 by German chemist Justus von Liebig in
* the protein casein from cheese.
* </p>
*/
TYROSINE(18, "Tyrosine", "Tyr", "Y"),
/**
* <p>
* Valine (abbreviated as Val or V) is an amino acid with the chemical
* formula HO2CCH(NH2)CH(CH3)2. L-Valine is one of 20 proteinogenic amino
* acids. Its codons are GUU, GUC, GUA, and GUG. This essential amino acid
* is classified as nonpolar. Human dietary sources include cottage cheese,
* fish, poultry, peanuts, sesame seeds, and lentils. Along with leucine and
* isoleucine, valine is a branched-chain amino acid. It is named after the
* plant valerian. In sickle-cell disease, valine substitutes for the
* hydrophilic amino acid glutamic acid in hemoglobin. Because valine is
* hydrophobic, the hemoglobin does not fold correctly.
* </p>
*/
VALINE(19, "Valine", "Val", "V");
private final Integer value;
private String displayValue;
private String threeLetter;
private String oneLetter;
private static final Map<String, AminoAcid> STRINGTOENUM;
static {
STRINGTOENUM = sizedHashMap(values().length * 3);
for (final AminoAcid e : values()) {
STRINGTOENUM.put(e.displayValue, e);
STRINGTOENUM.put(e.threeLetter, e);
STRINGTOENUM.put(e.oneLetter, e);
}
}
/**
* Constructor for setting enum, display, three-letter, and one-letter
* values.
*
* @param value Enum value
* @param displayValue Display value
* @param threeLetter Three-letter value
* @param oneLetter One-letter value
*/
private AminoAcid(Integer value, String displayValue, String threeLetter,
String oneLetter) {
this.value = value;
this.displayValue = displayValue;
this.threeLetter = threeLetter;
this.oneLetter = oneLetter;
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
return displayValue;
}
/**
* Returns the amino acid's value.
*
* @return value
* @see java.lang.Enum#ordinal() Contrast with {@code ordinal}
*/
public Integer getValue() {
return value;
}
/**
* Returns the amino acid's display value.
*
* @return display value
*/
public String getDisplayValue() {
return displayValue;
}
/**
* Returns the amino acid by its string representation.
*
* @param s Amino acid {@link String string} representation
* @return {@link AminoAcid}, may be null if the provided string has no
* amino acid representation
*/
public static AminoAcid getAminoAcid(final String s) {
AminoAcid e = STRINGTOENUM.get(s);
if (e != null) return e;
for (final String dispval : STRINGTOENUM.keySet()) {
if (dispval.equalsIgnoreCase(s))
return STRINGTOENUM.get(dispval);
}
return null;
}
/**
* Returns the one-letter abbreviation of this amino acid.
*
* @return {@link String}
*/
public String getOneLetter() {
return oneLetter;
}
/**
* Returns the one-letter abbreviation of an amino acid.
*
* @param a {@link AminoAcid}
* @return {@link String}
*/
public static String getOneLetter(final AminoAcid a) {
return a.oneLetter;
}
/**
* Returns a set of all the one-letter amino acid abbreviations.
*
* @return {@link Set}
*/
public static Set<String> getOneLetters() {
final Set<String> ret = sizedHashSet(values().length);
for (final AminoAcid a : values()) {
ret.add(a.oneLetter);
}
return ret;
}
/**
* Returns the three-letter abbreivation of this amino acid.
*
* @return {@link String}
*/
public String getThreeLetter() {
return threeLetter;
}
/**
* Returns the three-letter abbreviation of an amino acid.
*
* @param a {@link AminoAcid}
* @return {@link String}
*/
public static String getThreeLetter(final AminoAcid a) {
return a.threeLetter;
}
/**
* Returns a {@link Set set} of all the three-letter amino acid
* abbreviations.
*
* @return {@link Set}
*/
public static Set<String> getThreeLetters() {
final Set<String> ret = sizedHashSet(values().length);
for (final AminoAcid a : values()) {
ret.add(a.threeLetter);
}
return ret;
}
/**
* Returns {@code true} if this amino acid's side-chain polarity is polar,
* {@code false} otherwise.
*
* @return boolean
*/
public boolean isPolar() {
return isPolar(this);
}
/**
* Returns {@code true} if the amino acid's side-chain polariy is polar,
* {@code false} otherwise.
*
* @param a {@link AminoAcid}
* @return boolean
*/
public static boolean isPolar(final AminoAcid a) {
switch (a) {
case ASPARAGINE:
case ASPARTIC_ACID:
case GLUTAMIC_ACID:
case GLUTAMINE:
case HISTIDINE:
case LYSINE:
case SERINE:
case THREONINE:
case TYROSINE:
return true;
default:
return false;
}
}
/**
* Returns {@code true} if this amino acid's side-chain polarity is
* non-polar, {@code false} otherwise.
*
* @return boolean
*/
public boolean isNonPolar() {
return isNonPolar(this);
}
/**
* Returns {@code true} if the amino acid's side-chain polariy is non-polar,
* {@code false} otherwise.
*
* @param a {@link AminoAcid}
* @return boolean
*/
public static boolean isNonPolar(final AminoAcid a) {
switch (a) {
case ALANINE:
case ARGININE:
case CYSTEINE:
case GLYCINE:
case ISOLEUCINE:
case LEUCINE:
case METHIONINE:
case PHENYLALANINE:
case PROLINE:
case TRYPTOPHAN:
case VALINE:
return true;
default:
return false;
}
}
}