/*
* The MIT License (MIT)
*
* Copyright (c) 2007-2015 Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.broad.igv.feature;
import com.google.common.base.Objects;
import com.google.common.collect.Table;
import com.google.common.collect.TreeBasedTable;
import com.google.gson.*;
import org.apache.log4j.Logger;
import org.broad.igv.track.SequenceTrack;
import org.broad.igv.util.ParsingUtils;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.*;
/**
* @author jrobinso
*/
public class AminoAcidManager {
private static final Logger log = Logger.getLogger(AminoAcidManager.class);
/**
* File which contains listing of amino acid names.
* Format: Full Name \t 3 letter abbreviation \t Single letter abbrev.
*/
private static final String AANameFilePath = "resources/AANamesTable.txt";
/**
* Table containing mapping from string forms (full, TLA, single-letter-abbrev)
* to amino acid object. No codon information stored here
*/
private static final Map<String, AminoAcid> AANameMap = new HashMap<String, AminoAcid>(20);
private static final String[] BASE_SEQUENCES = {"TTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCAAAAAAAAAAAAAAAAGGGGGGGGGGGGGGGG",
"TTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGGTTTTCCCCAAAAGGGG",
"TCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAGTCAG"};
static final String DEFAULT_CODON_TABLE_PATH = "resources/geneticCode.json";
static final String DEFAULT_TRANS_TABLE_PATH = "resources/defaultTranslationTables.json";
//ID of the "standard" translation table
public static final int STANDARD_TABLE_ID = 1;
private static final String DEFAULT_CHROMO_KEY = "default";
private LinkedHashMap<CodonTableKey, CodonTable> allCodonTables = new LinkedHashMap<CodonTableKey, CodonTable>(20);
private CodonTable currentCodonTable;
private static Table<String, String, CodonTableKey> genomeChromoTable = TreeBasedTable.create();
private static AminoAcidManager instance;
private AminoAcidManager() {
initAANameMap();
try {
loadDefaultTranslationTables();
} catch (JsonParseException e) {
log.error(e);
}
}
public static AminoAcidManager getInstance() {
if (instance == null) {
try {
AminoAcidManager newInstance = new AminoAcidManager();
newInstance.loadCodonTables(DEFAULT_CODON_TABLE_PATH);
instance = newInstance;
} catch (IOException e) {
handleExceptionLoading(e);
} catch (JsonParseException e) {
handleExceptionLoading(e);
}
}
return instance;
}
/**
* Reset the codon table to the default file,
* and the current codon table to the default contained
* in that file
*
* @return Instance of AminoAcidManager, for chaining
*/
public static AminoAcidManager resetToDefaultCodonTables() {
instance = null;
return getInstance();
}
private static void handleExceptionLoading(Exception e) {
log.error(e);
if (instance == null) {
throw new IllegalStateException("No codon table present, and error loading " + DEFAULT_CODON_TABLE_PATH, e);
}
}
/**
* Removes all codon tables.
* Mainly for testing
*/
synchronized void clear() {
allCodonTables.clear();
currentCodonTable = null;
}
/**
* Each codon translation table is identified by an integer id
* These are specified in the file. We specify a table
* by filename/id combination
*
* @param codonTablePath
* @param id
* @return Whether setting the table was successful
*/
public boolean setCodonTable(String codonTablePath, int id) {
CodonTableKey key = new CodonTableKey(codonTablePath, id);
return setCodonTable(key);
}
public boolean setCodonTable(CodonTableKey key) {
if (allCodonTables.containsKey(key)) {
currentCodonTable = allCodonTables.get(key);
return true;
} else {
return false;
}
}
/**
* @param codon 3-letter nucleotide sequence
* @return The amino acid represented by this codon, as
* decoded from the current codon table
*/
public AminoAcid getAminoAcid(String codon) {
return currentCodonTable.getAminoAcid(codon);
}
/**
* Return a list of amino acids for the input sequence of nucleotides
*
* @param direction
* @param sequence
* @return
*/
List<AminoAcid> getAminoAcids(Strand direction, String sequence) {
// Sequence must be divisible by 3. It is the responsibility of the
// calling program to send a sequence properly aligned.
int readLength = sequence.length() / 3;
List<AminoAcid> acids = new ArrayList<AminoAcid>(readLength);
if(direction == Strand.NEGATIVE) {
sequence = SequenceTrack.getReverseComplement(sequence);
}
for (int i = 0; i <= sequence.length() - 3; i += 3) {
String codon = sequence.substring(i, i + 3).toUpperCase();
AminoAcid aa = currentCodonTable.getAminoAcid(codon);
acids.add(aa);
}
if(direction == Strand.NEGATIVE) {
Collections.reverse(acids);
}
return acids;
}
/**
* Get the amino acid sequence for an interval.
* Assumptions and conventions
*
* The start and end positions are on the positive strand
* irrespective of the read direction.
*
* Reading will begin from the startPosition if strand == POSITIVE, endPosition if NEGATIVE
*
* @return AminoAcidSequence, or null if seqBytes == null
*/
public AminoAcidSequence getAminoAcidSequence(Strand strand, int start, String nucSequence) {
if (nucSequence == null) {
return null;
} else {
int l = nucSequence.length();
int rem = l % 3;
int aaStart = strand == Strand.POSITIVE ? 0 : 0 + rem;
List<AminoAcid> acids = getAminoAcids(strand, nucSequence);
return new AminoAcidSequence(strand, start + aaStart, acids, currentCodonTable.getKey());
}
}
/**
* Given the 'name' of an amino acid, find a match. Lookups
* can be by full name, short form, or single letter. Note that
* in the case of multiple matches, the first is returned.
* This matters most for the stop codon, whose full name
* is ambiguous (ochre, amber, opal) if the the short form
* or single letter is used.
*
* @param name
* @return
*/
public static AminoAcid getAminoAcidByName(String name) {
initAANameMap();
AminoAcid aa = AANameMap.get(name);
if (aa == null) {
aa = AminoAcid.NULL_AMINO_ACID;
}
return aa;
}
public Set<String> getMappingSNPs(String codon, AminoAcid mutAA) {
Set<String> mapSNPs = new HashSet<String>();
Set<String> SNPs = getAllSNPs(codon);
for (String modCodon : SNPs) {
//We use short name because all 3 stop codon have different long names,
//and we don't care about the difference here.
if (currentCodonTable.getAminoAcid(modCodon).equalsByName(mutAA.getShortName())) {
mapSNPs.add(modCodon);
}
}
return mapSNPs;
}
/**
* Gets all possible strings which are a SNP from
* the provided sequence. Does not include original in
* returned set. Assumes sequence is DNA sequence, consisting
* of A,T,G,C, and uses that set to create SNPs.
*
* @param sequence
* @return
*/
public static Set<String> getAllSNPs(String sequence) {
Set<String> SNPs = new HashSet<String>();
char[] bps = "ATGC".toCharArray();
char[] orig = sequence.toCharArray();
char[] mod;
for (int loc = 0; loc < orig.length; loc++) {
mod = orig.clone();
for (char bp : bps) {
if (bp == orig[loc]) {
continue;
}
mod[loc] = bp;
SNPs.add(new String(mod));
}
}
return SNPs;
}
/**
* Load codon tables from the specified path. If any exceptions occur
* while loading, no changes are made to this instance.
* <p/>
* Note that the new codon tables are ADDED to the existing tables
* <p/>
* The currentCodonTable is set to be the codonTable with id = defaultid if present
* If not, the first one in the array is set as default
*
* @param codonTablesPath
* @return
*/
synchronized void loadCodonTables(String codonTablesPath) throws IOException, JsonParseException {
LinkedHashMap<CodonTableKey, CodonTable> newCodonTables = new LinkedHashMap<CodonTableKey, CodonTable>(20);
CodonTable defaultCodonTable = null;
InputStream is = AminoAcidManager.class.getResourceAsStream(codonTablesPath);
if (is == null) {
is = ParsingUtils.openInputStream(codonTablesPath);
}
if (codonTablesPath.endsWith(".json")) {
JsonObject allData = readJSONFromStream(is);
int defaultId = -1;
defaultId = allData.get("defaultid").getAsInt();
JsonArray codonArray = allData.get("Genetic-code-table").getAsJsonArray();
if (codonArray.size() == 0) {
throw new JsonParseException("JSON File has empty array for Genetic-code-table");
}
for (int ca = 0; ca < codonArray.size(); ca++) {
CodonTable curTable = CodonTable.createFromJSON(codonTablesPath, codonArray.get(ca).getAsJsonObject());
newCodonTables.put(curTable.getKey(), curTable);
if (defaultCodonTable == null || curTable.getId() == defaultId) {
defaultCodonTable = curTable;
}
}
} else {
throw new IllegalArgumentException("Unknown file type, must be .json");
}
allCodonTables.putAll(newCodonTables);
currentCodonTable = defaultCodonTable;
}
// private static JsonObject readJSONFromStream(InputStream is) throws JsonParseException {
// BufferedReader reader = new BufferedReader(new InputStreamReader(is));
// JSONTokener tokener = new JSONTokener(reader);
// return new JsonObject(tokener);
// }
private static JsonObject readJSONFromStream(InputStream is) {
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
JsonParser parser = new JsonParser();
return parser.parse(reader).getAsJsonObject();
}
/**
* Initialize table of amino acid names, for easy lookup of
* AminoAcid by symbols. This method is idempotent, only called once
* to read name file.
*/
private synchronized static void initAANameMap() {
if (!AANameMap.isEmpty()) {
return;
}
try {
InputStream is = AminoAcidManager.class.getResourceAsStream(AANameFilePath);
if (is == null) {
return;
}
BufferedReader reader = new BufferedReader(new InputStreamReader(is));
String nextLine;
while ((nextLine = reader.readLine()) != null) {
if (nextLine.startsWith("#")) continue;
String[] tokens = nextLine.split("\t");
if (tokens.length == 3) {
String fullName = tokens[0].trim();
String shortName = tokens[1].trim();
String symbol = tokens[2].trim();
assert symbol.length() == 1;
AminoAcid aa = new AminoAcid(fullName, shortName, symbol.charAt(0));
for (String sym : new String[]{fullName, shortName, symbol}) {
if (!AANameMap.containsKey(sym)) {
AANameMap.put(sym, aa);
}
}
}
}
} catch (IOException ex) {
log.error(ex);
throw new RuntimeException(ex);
}
}
public Collection<CodonTable> getAllCodonTables() {
return Collections.unmodifiableCollection(allCodonTables.values());
}
public CodonTable getCodonTable() {
return currentCodonTable;
}
private static void loadDefaultTranslationTables() throws JsonParseException {
InputStream is = AminoAcidManager.class.getResourceAsStream(DEFAULT_TRANS_TABLE_PATH);
JsonObject allData = readJSONFromStream(is);
JsonArray organisms = allData.get("organisms").getAsJsonArray();
for (int ind = 0; ind < organisms.size(); ind++) {
JsonObject obj = organisms.get(ind).getAsJsonObject();
//Process each translation table setting
String genomeId = obj.get("genomeId").getAsString();
String codonTablePath = DEFAULT_CODON_TABLE_PATH;
try {
Object tmpPath = obj.get("codonTablePath");
if (tmpPath != null && tmpPath != JsonNull.INSTANCE && tmpPath instanceof String) {
codonTablePath = (String) tmpPath;
}
} catch (JsonParseException e) {
log.error("No codon table path found in " + DEFAULT_TRANS_TABLE_PATH + ". Using default: " + codonTablePath);
}
JsonObject chromosomes = obj.get("chromosomes").getAsJsonObject();
Iterator<Map.Entry<String, JsonElement>> iterator = chromosomes.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<String, JsonElement> entry = iterator.next();
String chromoName = entry.getKey();
int id = entry.getValue().getAsInt();
CodonTableKey key = new CodonTableKey(codonTablePath, id);
genomeChromoTable.put(genomeId, chromoName, key);
}
}
}
// /**
// * Load the default codon table for the given genome and chromosome.
// * We check the given name, alias, and finally use the default for the specified
// * genome.
// *
// * @param genome
// * @param chrName
// */
// public void loadDefaultCodonTable(Genome genome, String chrName) {
// Map<String, CodonTableKey> chrMap = genomeChromoTable.row(genome.getId());
// String[] tryChromos = new String[]{
// chrName, genome.getCanonicalChrName(chrName), DEFAULT_CHROMO_KEY
// };
// for (String tryChromo : tryChromos) {
// if (chrMap.containsKey(tryChromo)) {
// setCodonTable(chrMap.get(tryChromo));
// return;
// }
// }
// }
public static class CodonTableKey {
private final String sourcePath;
private final int id;
private CodonTableKey(String sourcePath, int id) {
this.sourcePath = sourcePath;
this.id = id;
}
@Override
public boolean equals(Object object) {
if (object instanceof CodonTableKey) {
CodonTableKey other = (CodonTableKey) object;
return this.id == other.id &&
Objects.equal(this.sourcePath, other.sourcePath);
}
return false;
}
@Override
public int hashCode() {
return Objects.hashCode(this.sourcePath, this.id);
}
public int getId() {
return id;
}
}
/**
* Store information about current codon translation table.
* Intended to be loaded from external resource, and then never modified.
* To that end, collections contained here are set to be unmodifiable
*/
public static class CodonTable {
private final CodonTableKey key;
private final List<String> names;
private final Set<AminoAcid> starts;
private final Map<String, AminoAcid> codonMap;
/**
* Get the amino acid represented by this codon
*
* @param codon
* @return
*/
public AminoAcid getAminoAcid(String codon) {
if (codon.length() != 3) {
throw new IllegalArgumentException("Codon must be length 3: " + codon);
}
AminoAcid aa = codonMap.get(codon);
if (aa == null) {
return AminoAcid.NULL_AMINO_ACID;
}
return aa;
}
private CodonTable(String path, int id, List<String> names, Set<AminoAcid> starts, Map<String, AminoAcid> codonMap) {
this.key = new CodonTableKey(path, id);
this.names = Collections.unmodifiableList(names);
this.starts = Collections.unmodifiableSet(starts);
this.codonMap = Collections.unmodifiableMap(codonMap);
}
private static CodonTable createFromJSON(String sourcePath, JsonObject jsonObject) throws JsonParseException {
int id = jsonObject.get("id").getAsInt();
JsonArray jsonnames = jsonObject.get("name").getAsJsonArray();
List<String> names = new ArrayList<String>(jsonnames.size());
for (int nn = 0; nn < jsonnames.size(); nn++) {
names.add(jsonnames.get(nn).getAsString());
}
//Data is written as several long strings which line up
String aas = jsonObject.get("ncbieaa").getAsString();
String startString = jsonObject.get("sncbieaa").getAsString();
return build(sourcePath, id, names, aas, startString);
}
private static CodonTable build(String sourcePath, int id, List<String> names, String aas, String startString) {
String base1 = BASE_SEQUENCES[0];
String base2 = BASE_SEQUENCES[1];
String base3 = BASE_SEQUENCES[2];
checkLengths(base1, base2, base3, aas, startString);
Map<String, AminoAcid> codonMap = new HashMap<String, AminoAcid>(aas.length());
Set<AminoAcid> starts = new HashSet<AminoAcid>(aas.length());
for (int cc = 0; cc < aas.length(); cc++) {
String codon = base1.substring(cc, cc + 1) + base2.substring(cc, cc + 1) + base3.substring(cc, cc + 1);
AminoAcid aa = AANameMap.get(aas.substring(cc, cc + 1));
codonMap.put(codon, aa);
if (startString.charAt(cc) == 'M') {
starts.add(aa);
}
}
return new CodonTable(sourcePath, id, names, starts, codonMap);
}
private static void checkLengths(String... values) {
int length = values[0].length();
assert length == 64;
for (int v = 1; v < values.length; v++) {
if (values[v].length() != length) {
String msg = "Amino acid and codon strings must all be the same length.";
msg += "Expected length " + length + ", found length " + values[v].length();
throw new InputMismatchException(msg);
}
}
}
public int getId() {
return key.id;
}
public String getDisplayName() {
return names.get(0);
}
public Set<AminoAcid> getStarts() {
return starts;
}
Map<String, AminoAcid> getCodonMap() {
return codonMap;
}
@Override
public boolean equals(Object object) {
if (object instanceof CodonTable) {
CodonTable other = (CodonTable) object;
return Objects.equal(this.key, other.key) &&
Objects.equal(this.names, other.names) &&
Objects.equal(this.starts, other.starts) &&
Objects.equal(this.codonMap, other.codonMap);
}
return false;
}
@Override
public int hashCode() {
return Objects.hashCode(this.key.id, this.key.sourcePath, this.names, this.starts, this.codonMap);
}
public CodonTableKey getKey() {
return key;
}
}
}