package aliview.primer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import aliview.NucleotideUtilities;
public class Dimer {
private static int minDimerReportLength = 5;
private static final int NUMBER_OF_ONE_BASE_GAPS_ALLOWED = 1;
public static void main(String[] args) {
String seq1 = "CCCATGGGGTGTGCAAGTTCGTTGTG";
String seq2 = "ACACWGCAACTTGCACACCATA";
//new Dimer(seq1,seq2).get3EndDimer();
new Dimer(seq1,seq2).getAllDimers();
}
private static final Logger logger = Logger.getLogger(Dimer.class);
String sequence1;
String sequence2;
public Dimer(String seq1, String seq2) {
super();
this.sequence1 = seq1;
this.sequence2 = seq2;
}
public static int getDimerLengthThreashold() {
return minDimerReportLength;
}
public ArrayList<DimerResult> get3EndDimer(){
ArrayList<DimerResult> end3Dimers = new ArrayList<DimerResult>();
ArrayList<DimerResult> allDimers = getAllDimers(this.sequence1, this.sequence2, getDimerLengthThreashold(), NUMBER_OF_ONE_BASE_GAPS_ALLOWED);
for(DimerResult dimer: allDimers){
if(dimer.is3EndDimer()){
end3Dimers.add(dimer);
}
}
/*
for(DimerResult dimer: end3Dimers){
logger.info("3 end dimerlen=" + dimer.getDimerLength());
for(String dimerTextLine:dimer.getDimerAsText()){
logger.info(dimerTextLine);
}
}
*/
return end3Dimers;
}
public ArrayList<DimerResult> getAllDimers(){
ArrayList<DimerResult> allDimers = getAllDimers(this.sequence1, this.sequence2, getDimerLengthThreashold(), NUMBER_OF_ONE_BASE_GAPS_ALLOWED);
/*
for(DimerResult dimer: allDimers){
logger.info("dimerlength:" + dimer.getDimerLength());
for(String dimerTextLine:dimer.getDimerAsText()){
logger.info(dimerTextLine);
}
logger.info("");
}
*/
return allDimers;
}
public int get3EndDimerMaxLength() {
ArrayList<DimerResult> all3Dimers = get3EndDimer();
int maxLen = 0;
for(DimerResult dimer: all3Dimers){
maxLen = Math.max(dimer.getDimerLength(), maxLen);
}
return maxLen;
}
public int getDimerMaxLength() {
ArrayList<DimerResult> allDimers = getAllDimers();
int maxLen = 0;
for(DimerResult dimer: allDimers){
maxLen = Math.max(dimer.getDimerLength(), maxLen);
}
return maxLen;
}
public ArrayList<String> getAllDimersAsText(){
ArrayList<DimerResult> allDimers = getAllDimers();
ArrayList<String> allText = new ArrayList<String>();
for(DimerResult dimer: allDimers){
String[] dimerText = dimer.getDimerAsText();
for(String text: dimerText){
allText.add(text);
}
// two blank rows
allText.add("");
allText.add("");
}
return allText;
}
/*
private int get3EndDimer(String seq1, String seq2){
int max3Size = 0;
DimerResult dimerResult = null;
// todo very ugly create a dimer result
DimerResult maxDimerResult = null;
// loop seq 1 backwards and check how long uninterrupted stack there is
for(int n1 = seq1.length() -1; n1 >= 0 ; n1--){
int overlap = seq1.length() -n1;
logger.info("overlap" + overlap);
int size3end = 0;
// Start a new DimerResult
dimerResult = new DimerResult(seq1.length()-1,0);
// overlap -1 because of index 0 in string
for(int n2 = overlap - 1; n2 >= 0 && n2 < seq2.length(); n2--){
logger.info("n1=" + n1);
logger.info("n2=" + n2);
logger.info("seq1.length()" + seq1.length());
char n1Char = seq1.charAt(n1 + n2);
char n2Char = seq2.charAt(n2);
dimerResult.setPrimer1EndPos(n1 + n2);
dimerResult.setPrimer2EndPos(n2);
int charDimerVal = NucleotideUtilities.getDimerBinding(n1Char, n2Char);
if(charDimerVal >= 1){
dimerResult.addCharDimerValue(charDimerVal);
size3end ++;
}
// stacking is interrupted by one non-stacking pos
else{
// check how many interruptions is allowed or if it is time to break
if(false){
}
else{
// stacking is interrupted by a non-binding-pos, since it is only 3end value we want clear size
size3end = 0;
break;
}
}
}
// it is finished
if(maxDimerResult == null || maxDimerResult.getDimerLength() < dimerResult.getDimerLength()){
logger.info("newDimerResult");
maxDimerResult = dimerResult;
}
//DimerResult end3Dimer = new DimerResult(primer1StartPos, primer1EndPos, primer2StartPos, primer2EndPos)
max3Size = Math.max(max3Size, size3end);
}
logger.info("3\"-dimer-stackSize: " + max3Size);
logger.info("maxDimerResult.getDimerLength()" + maxDimerResult.getDimerLength());
logger.info("maxDimerResult.getPrimer1StartPos()" + maxDimerResult.getPrimer1StartPos());
logger.info("maxDimerResult.getPrimer1EndPos()" + maxDimerResult.getPrimer1EndPos());
return max3Size;
}
*/
/*
private ArrayList<DimerResult> getAny3Dimer(String seq1, String seq2, int minDimerLen, int numberOfOneBaseGapsAllowed){
int maxDimerSize = 0;
ArrayList<DimerResult> allDimers = new ArrayList<DimerResult>();
DimerResult dimerResult = null;
int n1Start = seq1.length();
int n2Start = 0;
n1Start --;
n2Start ++;
offset = 40
seq1.charAt(n1Start + offset);
seq2.charAt(n2Start + offset);
// todo very ugly create a dimer result
// loop seq 1 backwards and check how long uninterrupted stack there is
for(int n1 = seq1.length() -1; n1 > - seq2.length() ; n1--){
int overlap = seq1.length() -n1;
logger.info("overlap" + overlap);
int dimerSize = 0;
int numberOfOneBaseGapsFound = 0;
// overlap -1 because of index 0 in string
for(int n2 = overlap -1; n2 >= 0 && n2 < seq2.length(); n2--){
logger.info("n1=" + n1);
logger.info("n2=" + n2);
logger.info("seq1.length()" + seq1.length());
int primer1CharPos = n1 + n2;
int primer2CharPos = n2;
// set default non characters
char n1Char = '-';
char n2Char = '-';
if(primer1CharPos >= 0 && primer1CharPos < seq1.length()){
n1Char = seq1.charAt(primer1CharPos);
}
if(primer2CharPos >= 0 && primer2CharPos < seq2.length()){
n2Char = seq2.charAt(primer2CharPos);
}
int charDimerVal = NucleotideUtilities.getDimerBinding(n1Char, n2Char);
if(charDimerVal >= 1){
// start a new dimerResult
if(dimerResult == null){
dimerResult = new DimerResult(seq1, seq2);
dimerResult.setDimerStartPos(primer1CharPos, primer2CharPos);
}
dimerResult.setPrimer1EndPos(primer1CharPos);
dimerResult.setPrimer2EndPos(primer2CharPos);
logger.info("Dimer" + dimerResult.getDimerLength());
dimerResult.addCharDimerValue(charDimerVal);
dimerSize ++;
}
// stacking is interrupted by one non-stacking pos
else{
// check how many interruptions is allowed or if it is time to break
if(numberOfOneBaseGapsAllowed > numberOfOneBaseGapsFound){
numberOfOneBaseGapsFound ++;
}
else{
// stacking is interrupted by a non-binding-pos
// save dimer result and start a new one
if(dimerResult != null && dimerResult.getDimerLength() >= minDimerLen){
allDimers.add(dimerResult);
}
dimerResult = null;
dimerSize = 0;
numberOfOneBaseGapsFound = 0;
//(break if only 3")
}
}
}
maxDimerSize = Math.max(maxDimerSize, dimerSize);
}
for(DimerResult dimer: allDimers){
logger.info("dimerlen=" + dimer.getDimerLength());
}
logger.info("3\"-dimer-stackSize: " + maxDimerSize);
return allDimers;
}
*/
private ArrayList<DimerResult> getAllDimers(String seq1, String seq2, int minDimerLen, int numberOfOneBaseGapsAllowed){
ArrayList<DimerResult> allDimers = new ArrayList<DimerResult>();
// loop seq 1 backwards and check how long uninterrupted stack there is
for(int n1 = -seq2.length(); n1 < seq1.length() ; n1++){
// start a new dimer result
DimerResult dimerResult = null;
int numberOfOneBaseGapsFound = 0;
for(int n2 = 0; n2 < seq2.length() ; n2++){
// logger.info("n1=" + n1);
// logger.info("n2=" + n2);
// logger.info("seq1.length()" + seq1.length());
int primer1CharPos = n1 + n2;
int primer2CharPos = n2;
if(primer1CharPos >= 0 && primer1CharPos < seq1.length()){
// set default non characters
char n1Char = '-';
char n2Char = '-';
if(primer1CharPos >= 0 && primer1CharPos < seq1.length()){
n1Char = seq1.charAt(primer1CharPos);
}
if(primer2CharPos >= 0 && primer2CharPos < seq2.length()){
n2Char = seq2.charAt(primer2CharPos);
}
int charDimerVal = NucleotideUtilities.getDimerBinding(n1Char, n2Char);
if(charDimerVal >= 1){
// start a new dimerResult
if(dimerResult == null){
dimerResult = new DimerResult(seq1, seq2);
dimerResult.setDimerStartPos(primer1CharPos, primer2CharPos);
}
dimerResult.setPrimer1EndPos(primer1CharPos);
dimerResult.setPrimer2EndPos(primer2CharPos);
// logger.info("Dimer" + dimerResult.getDimerLength());
dimerResult.addCharDimerValue(charDimerVal);
}
// stacking is interrupted by one non-stacking pos
else{
// check how many interruptions is allowed or if it is time to break
if(numberOfOneBaseGapsAllowed > numberOfOneBaseGapsFound){
numberOfOneBaseGapsFound ++;
}
else{
// stacking is interrupted by a non-binding-pos
// save dimer result and start a new one
if(dimerResult != null && dimerResult.getDimerLengthWithoutAnyGaps() >= minDimerLen){
allDimers.add(dimerResult);
}
dimerResult = null;
numberOfOneBaseGapsFound = 0;
}
}
}else{
// stacking is interrupted by a non-binding-pos
// save dimer result and start a new one
if(dimerResult != null && dimerResult.getDimerLengthWithoutAnyGaps() >= minDimerLen){
allDimers.add(dimerResult);
}
dimerResult = null;
numberOfOneBaseGapsFound = 0;
}
}
}
//
// for(DimerResult dimer: allDimers){
// logger.info("dimerlen=" + dimer.getDimerLength());
// }
//
// reverese result so 3" results come firsy
Collections.reverse(allDimers);
return allDimers;
}
class DimerResult{
int primer1StartPos;
int primer1EndPos;
int primer2StartPos;
int primer2EndPos;
String sequence1;
String sequence2;
double totalDimerValue;
String dimerBindString = "";
ArrayList<Double> dimerValues = new ArrayList<Double>();
public DimerResult(String seq1, String seq2) {
super();
this.sequence1 = seq1;
this.sequence2 = seq2;
}
public void setDimerStartPos(int primer1StartPos, int primer2StartPos) {
this.primer1StartPos = primer1StartPos;
this.primer2StartPos = primer2StartPos;
this.primer1EndPos = primer1StartPos;
this.primer2EndPos = primer2StartPos;
}
public int getDimerLength() {
return Math.abs(primer1StartPos - primer1EndPos) + 1;
}
public int getDimerLengthWithoutAnyGaps() {
String dimerBind = getBindString();
return StringUtils.countMatches(dimerBind, "|");
}
public void addCharDimerValue(double charDimerVal) {
totalDimerValue += charDimerVal;
dimerValues.add(new Double(charDimerVal));
}
public void setPrimer1EndPos(int primer1EndPos) {
this.primer1EndPos = primer1EndPos;
}
public int getPrimer1EndPos() {
return primer1EndPos;
}
public int getPrimer2EndPos() {
return primer2EndPos;
}
public void setPrimer2EndPos(int primer2EndPos) {
this.primer2EndPos = primer2EndPos;
}
public int getPrimer1StartPos() {
return primer1StartPos;
}
public int getPrimer2StartPos() {
return primer2StartPos;
}
public void setPrimer1StartPos(int primer1StartPos) {
this.primer1StartPos = primer1StartPos;
}
public String[] getDimerAsText(){
int diff = getPrimer2StartPos() - getPrimer1StartPos();
//String primer1Line = createNewBlankString(diff) + "5\" " + sequence1 + " 3\"";
String primer1Line = createNewBlankString(diff) + "" + sequence1 + " 3\"";
diff = getPrimer1StartPos() - getPrimer2StartPos();
//String primer2Line = createNewBlankString(diff) + "3\" " + sequence2 + " 5\"";
String primer2Line = createNewBlankString(diff) + "" + sequence2 + " 5\"";
diff = Math.max(getPrimer1StartPos(), getPrimer2StartPos());
//String bindString = createNewBlankString(diff) + " " + getBindString(); // the 3 blanks is to compensate for text 3" or 5"
String bindString = createNewBlankString(diff) + getBindString(); // the 3 blanks is to compensate for text 3" or 5"
String[] dimerBindAsText = new String[3];
dimerBindAsText[0] = primer1Line;
dimerBindAsText[1] = bindString;
dimerBindAsText[2] = primer2Line;
return dimerBindAsText;
}
private String createNewBlankString(int n) {
if(n <= 0){
return "";
}
char[] blanks = new char[n];
Arrays.fill(blanks,' ');
return new String(blanks);
}
private String getBindString() {
StringBuilder binding = new StringBuilder();
for(int n = 0; n < getDimerLength(); n++){
int bindVal = NucleotideUtilities.getDimerBinding(
sequence1.charAt(getPrimer1StartPos() + n),
sequence2.charAt(getPrimer2StartPos() + n));
if(bindVal >= 1){
binding.append('|');
}
else{
binding.append(' ');
}
}
return binding.toString();
}
public int getPrimer1MinPos(){
return Math.min(getPrimer1StartPos(), getPrimer1EndPos());
}
public int getPrimer1MaxPos(){
return Math.max(getPrimer1StartPos(), getPrimer1EndPos());
}
public int getPrimer2MinPos(){
return Math.min(getPrimer2StartPos(), getPrimer2EndPos());
}
public int getPrimer2MaxPos(){
return Math.max(getPrimer2StartPos(), getPrimer2EndPos());
}
public boolean is3EndDimer(){
/*
logger.info("getPrimer1MinPos()" + getPrimer1MinPos());
logger.info("getPrimer1MaxPos()" + getPrimer1MaxPos());
logger.info("getPrimer2MinPos()" + getPrimer2MinPos());
logger.info("getPrimer2MaxPos()" + getPrimer2MaxPos());
*/
if(getPrimer1MaxPos() == sequence1.length() - 1 && getPrimer2MinPos() == 0){
return true;
}
else{
return false;
}
}
}
public static final void setDimerLengthThreashold(int length) {
minDimerReportLength = length;
}
}