/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package org.twentyn.proteintodna;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class CodonIndexer {
private static final double limit = 3.0;
Map<String,Map<String,Integer>> peptideToCodons = new HashMap<>();
Map<Character, List<String>> aminoAcidToBestCodons = new HashMap<>();
SequenceChecker checker = new SequenceChecker();
Translator translator = new Translator();
private static CodonIndexer singleton;
private CodonIndexer() {
}
public static CodonIndexer initiate() throws Exception {
if(singleton!=null) {
return singleton;
}
singleton = new CodonIndexer();
//Gather up all the orfs with greater copy than the limit
List<String> orfs = new ArrayList<>();
String data = FileUtils.readFile2("data/CodonOptimization/coli_genes.txt");
String[] lines = data.split("\\r|\\r?\\n");
for(String line : lines) {
try {
String[] tabs = line.split("\t");
double copy = Double.parseDouble(tabs[7]);
if(copy > limit) {
orfs.add(tabs[6].toUpperCase());
}
} catch(Exception err) {
continue;
}
}
//Index each orf
for(String orf : orfs) {
//Iterate through orf starting with each codon
for(int i=0; i<orf.length(); i=i+3) {
//Do 1 codon
try {
String cds = orf.substring(i, i+3);
String peptide = singleton.translator.translate(cds);
singleton.index(peptide, cds);
} catch(Exception err) {}
//Do 2 codons
try {
String cds = orf.substring(i, i+6);
String peptide = singleton.translator.translate(cds);
singleton.index(peptide, cds);
} catch(Exception err) {}
//Do 3 codons
try {
String cds = orf.substring(i, i+9);
String peptide = singleton.translator.translate(cds);
singleton.index(peptide, cds);
} catch(Exception err) {}
//Do 4 codons
try {
String cds = orf.substring(i, i+12);
String peptide = singleton.translator.translate(cds);
singleton.index(peptide, cds);
} catch(Exception err) {}
//Do 5 codons
try {
String cds = orf.substring(i, i+15);
String peptide = singleton.translator.translate(cds);
singleton.index(peptide, cds);
} catch(Exception err) {}
//Do 6 codons
try {
String cds = orf.substring(i, i+18);
String peptide = singleton.translator.translate(cds);
singleton.index(peptide, cds);
} catch(Exception err) {}
}
}
String aas = "ACDEFGHIKLMNPQRSTVWY";
// System.out.println("This many aas: " + aas.length());
for(int i=0; i<aas.length(); i++) {
char aa = aas.charAt(i);
List<String> codons = new ArrayList<>();
//Find the best codon count
Map<String,Integer> codonToCount = singleton.peptideToCodons.get("" + aa);
int best = 0;
for(String codon : codonToCount.keySet()) {
Integer count = codonToCount.get(codon);
if(count > best) {
best = count;
}
}
//Include only those codons with at least 25% the count of best codon
for(String codon : codonToCount.keySet()) {
Integer count = codonToCount.get(codon);
if(count > 0.25* best) {
codons.add(codon);
}
}
//If only one codon survived, include the next best
if(codons.size() == 1) {
String nextBest = singleton.getNextBestSeq("" + aa);
if(nextBest !=null) {
codons.add(nextBest);
}
}
//Sort the codons in the list
if(codons.size() > 1) {
outer: while(true) {
for(int x=0; x<codons.size()-1; x++) {
String codon1 = codons.get(x);
String codon2 = codons.get(x+1);
int count1 = codonToCount.get(codon1);
int count2 = codonToCount.get(codon2);
if(count2>count1) {
codons.remove(x+1);
codons.add(x, codon2);
continue outer;
}
}
break;
}
}
singleton.aminoAcidToBestCodons.put(aa, codons);
}
//Put in TAA or TGA for a stop
List<String> stops = new ArrayList<>();
stops.add("TAA");
stops.add("TGA");
singleton.aminoAcidToBestCodons.put('*', stops);
return singleton;
}
private void index(String peptide, String cds) {
Map<String, Integer> amap = peptideToCodons.get(peptide);
if (amap == null) {
amap = new HashMap<>();
}
Integer count = amap.get(cds);
if(count == null) {
count = 0;
}
count++;
if(cds.length() >= 6) {
if(checker.check(cds)==false) {
return;
}
}
amap.put(cds, count);
peptideToCodons.put(peptide,amap);
}
public String getBestSeq(String peptide) {
String best = null;
int bestscore = -9999;
try {
Map<String, Integer> amap = peptideToCodons.get(peptide);
for(String cds : amap.keySet()) {
Integer score = amap.get(cds);
if(score > bestscore) {
best = cds;
bestscore = score;
}
}
} catch(Exception err) {
return null;
}
return best;
}
public String getNextBestSeq(String peptide) {
String best = null;
String nextbest = null;
int bestscore = -9999;
try {
Map<String, Integer> amap = peptideToCodons.get(peptide);
for(String cds : amap.keySet()) {
Integer score = amap.get(cds);
if(score > bestscore) {
nextbest = best;
best = cds;
bestscore = score;
}
}
} catch(Exception err) {
return null;
}
return nextbest;
}
public static void main(String[] args) throws Exception {
CodonIndexer indexer = CodonIndexer.initiate();
//Test a dipeptide with lots of diversity
String seq = indexer.getBestSeq("AA"); //best is GCGGCG with score 66
System.out.println("Best for AA: " + seq);
//Print out best codons per aa
for(Character aa : indexer.aminoAcidToBestCodons.keySet()) {
System.out.print(aa + ": ");
List<String> codons = indexer.aminoAcidToBestCodons.get(aa);
for(String codon : codons) {
System.out.print(codon + ", ");
}
System.out.println();
}
System.out.println("done");
}
public Set<String> getAllSeq(String peptide) {
Set<String> out = new HashSet<String>();
//Add all valid codon options
for(int x=1; x<=6; x++) {
String pep = peptide.substring(0,x);
String bestcodon = getBestSeq(pep);
out.add(bestcodon);
String secondbest = getNextBestSeq(pep);
out.add(secondbest);
}
out.remove(null);
return out;
}
}