/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package org.twentyn.proteintodna;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* This is a hacked down version of RBSChooser2 that doesn't do the update portion.
* It just chooses an RBS.
*/
public class RBSChooser3 {
private List<RBSOption> rbss;
Translator translator = new Translator();
public static RBSChooser3 initiate() throws Exception {
RBSChooser3 out = new RBSChooser3();
//Gather up all the genes, index by gene name
Map<String,String[]> geneToData = new HashMap<>();
String data = FileUtils.readFile2("data/CodonOptimization/coli_genes.txt");
String[] lines = data.split("\\r|\\r?\\n");
for(String line : lines) {
try {
String[] tabs = line.split("\t");
String name = tabs[1];
geneToData.put(name, tabs);
} catch(Exception err) {
continue;
}
}
//Populate the RBS choices
out.rbss = new ArrayList<>();
data = FileUtils.readFile2("data/CodonOptimization/rbs_options.txt");
lines = data.split("\\r|\\r?\\n");
for(int i=0; i<lines.length; i++) {
String line = lines[i];
String[] tabs = line.split("\t");
String name = tabs[0];
if(!geneToData.containsKey(name)) {
System.out.println("!! skipping + " + name);
continue;
}
//Populate the RBS option
RBSOption opt = new RBSOption();
opt.rbs = tabs[1];
opt.name = name;
String[] rbsdata = geneToData.get(name);
opt.cds = rbsdata[6];
opt.first6aas = out.translator.translate(opt.cds.substring(0,18));
out.rbss.add(opt);
}
return out;
}
public RBSOption choose(String peptide, Set<RBSOption> ignores) throws Exception {
String pep = peptide.substring(0,18);
RBSOption bestRBS = null;
int best = 100000;
for(RBSOption opt : rbss) {
if(ignores.contains(opt)) {
continue;
}
int score = dpEditDistance(pep, opt.first6aas);
// This test has been completed and no Exceptions get thrown
// so we are certain that the native optimized dynamic programming
// yield identical results. We can remove this outdated code before mainlining.
// int scoreNaive = naiveEditDistance(pep, opt.first6aas);
// if (score != scoreNaive)
// throw new Exception("optimized and naive computation differ! test failure!");
if(score < best) {
best = score;
bestRBS = opt;
}
}
return bestRBS;
}
// this recursive (as opposed to dynamic programming) version takes O(3^18) = 387,420,489 steps
// while the DP version would take O(18 * 18) = 324 steps
// So 1.2M times improvement in going to non recursive version
// This is dead code now. Delete before mainlining.
private static int naiveEditDistance(String s1, String s2) {
int matchDist; // Edit distance if first char. match or do a replace
int insertDist; // Edit distance if insert first char of s1 in front of s2.
int deleteDist; // Edit distance if delete first char of s2.
int swapDist; // edit distance for twiddle (first 2 char. must swap).
if (s1.length() == 0) {
return s2.length(); // Insert the remainder of s2
} else if (s2.length() == 0) {
return s1.length(); // Delete the remainer of s1
} else {
matchDist = naiveEditDistance(s1.substring(1), s2.substring(1));
if (s1.charAt(0) != s2.charAt(0)) {
matchDist++; // If first 2 char. don't match must replace
}
insertDist = naiveEditDistance(s1.substring(1), s2) + 1;
deleteDist = naiveEditDistance(s1, s2.substring(1)) + 1;
if (s1.length() > 1 && s2.length() > 1
&& s1.charAt(0) == s2.charAt(1) && s1.charAt(1) == s2.charAt(0)) {
swapDist = naiveEditDistance(s1.substring(2), s2.substring(2)) + 1;
} else {
swapDist = Integer.MAX_VALUE; // Can't swap if first 2 char. don't match
}
return Math.min(matchDist, Math.min(insertDist, Math.min(deleteDist, swapDist)));
}
}
// Compute edit distance using Smith-Waterman.
private static int dpEditDistance(String s1, String s2) {
int s1len = s1.length();
int s2len = s2.length();
int[][] dist = new int[s1len + 1][s2len + 1];
for (int a = 0; a <= s1len; a++) {
for (int b = 0; b <= s2len; b++) {
if (a == 0) dist[a][b] = b;
else if (b == 0) dist[a][b] = a;
else if (s1.charAt(a - 1) == s2.charAt(b - 1)) dist[a][b] = dist[a-1][b-1];
else dist[a][b] = 1 + Math.min(Math.min(dist[a][b-1], dist[a-1][b]), dist[a-1][b-1]);
}
}
return dist[s1len][s2len];
}
}