package com.ppfold.algo.extradata;
import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import com.ppfold.algo.MatrixTools;
public class ExtraDataProbMapping implements ExtraData {
//This is a special type of ExtraData, where the numbers in the input file are actually
//the probability densities of some observation given paired/unpaired. It is assumed the
//user has calculated these numbers on their own according to some model.
private int type = 1;
//Probabilities must always be given as P(data|unpaired) or P(data|paired).
private float[] dataProbGivenPaired; //P(data|model), unpaired case
private float[] dataProbGivenUnpaired; //P(data|model), paired case
public int getType(){
return type;
}
public float getProbabilityGivenOuterPaired(int position1, int position2) {
return dataProbGivenPaired[position1]*dataProbGivenPaired[position2];
}
public float getProbabilityGivenInnerPaired(int position1, int position2) {
return dataProbGivenPaired[position1]*dataProbGivenPaired[position2];
}
public float getProbabilityGivenUnpaired(int position) {
return dataProbGivenUnpaired[position];
}
public void importData(String filename, int sequencelength) throws Exception{
//read the SHAPE data
BufferedInputStream stream = null;
try {
stream = new BufferedInputStream(new FileInputStream(filename));
} catch (FileNotFoundException e) {
System.err.println("Extra data input file " + filename + " could not be read!");
throw new IOException(e);
}
readData_toStream(stream, sequencelength);
}
public void readData_toStream(BufferedInputStream stream, int sequencelength) throws Exception{
//create the data containers
this.dataProbGivenPaired = new float[sequencelength];
this.dataProbGivenUnpaired = new float[sequencelength];
if(stream!=null){
String line = "";
try{
int l = stream.available();
byte[] bytes = new byte[l];
stream.read(bytes);
stream.close();
String data_string = new String(bytes);
String[] lines = data_string.split("\n");
int data_size = lines.length;
int [] readdata_index = new int[data_size];
float [] readdata_data1 = new float[data_size];
float[] readdata_data2 = new float[data_size];
// Create a pattern to match different kinds of separators
Pattern p = Pattern.compile("[,\\s]+");
//DO NOT ignore first line...
for(int i = 0; i<lines.length; i++){
line = lines[i];
String splitline[] = p.split(line.trim());
if(splitline.length == 3){
readdata_index[i] = Integer.valueOf(splitline[0])-1; //data files are numbered from 1; Java numbers from 0
readdata_data1[i] = Float.valueOf(splitline[1]);
readdata_data2[i] = Float.valueOf(splitline[2]);
}
}
//initialize all data values to -999 and all probability values to 1
for(int i = 0; i<sequencelength; i++){
dataProbGivenPaired[i] = 1;
dataProbGivenUnpaired[i] = 1;
}
//set the appropriate data values to the read ones
for(int i = 0; i<data_size; i++){
//
dataProbGivenPaired[readdata_index[i]] = readdata_data1[i];
dataProbGivenUnpaired[readdata_index[i]] = readdata_data2[i];
}
}
catch(Exception e){
System.err.println("An exception occured while attempting to read or interpret the data. ");
throw new Exception(e);
}
}
else{
System.err.println("Input stream was null, the data could not be loaded.");
}
}
public void transformToAlignment(String gappedseq) {
int n = gappedseq.length();
float[] dataProbGivenPaired_a = new float[n];
float[] dataProbGivenUnpaired_a = new float[n];
int cnt = 0; //counts sequence positions
for(int i = 0; i<n; i++){
//step alignment positions
if(MatrixTools.isGap(gappedseq.charAt(i))){
//if there's a gap, set probabilities to 1
dataProbGivenPaired_a[i] = 1;
dataProbGivenUnpaired_a[i] = 1;
}
else{
dataProbGivenPaired_a[i] = dataProbGivenPaired[cnt];
dataProbGivenUnpaired_a[i] = dataProbGivenUnpaired[cnt];
cnt++;
}
//Prevent weird results by setting 0's to a very small finite number instead
if(dataProbGivenPaired_a[i]==0){
dataProbGivenPaired_a[i]=Float.MIN_VALUE;
}
if(dataProbGivenUnpaired_a[i]==0){
dataProbGivenUnpaired_a[i]=Float.MIN_VALUE;
}
//System.out.println(i + ": pairing="+dataProbGivenPaired_a[i] + ", " +
// "unpaired="+dataProbGivenUnpaired_a[i]);
}
this.dataProbGivenPaired = dataProbGivenPaired_a;
this.dataProbGivenUnpaired = dataProbGivenUnpaired_a;
}
public void removeColumns(List<Integer> leftoutcolumns){
Iterator<Integer> iter = leftoutcolumns.iterator();
int leaveout = 0;
int from = 0;
int cnt = 0; //counts position in new thing
int length = this.dataProbGivenPaired.length;
float[] dataProbGivenPaired_a = new float[length - leftoutcolumns.size()];
float[] dataProbGivenUnpaired_a = new float[length - leftoutcolumns.size()];
while(iter.hasNext()){
leaveout = iter.next();
for(int i = from; i<leaveout; i++){
dataProbGivenPaired_a[cnt] = dataProbGivenPaired[i];
dataProbGivenUnpaired_a[cnt] = dataProbGivenUnpaired[i];
cnt++;
}
from = leaveout+1;
}
//do the last part part
for(int i = from; i<length; i++){
dataProbGivenPaired_a[cnt] = dataProbGivenPaired[i];
dataProbGivenUnpaired_a[cnt] = dataProbGivenUnpaired[i];
cnt++;
}
this.dataProbGivenPaired = dataProbGivenPaired_a;
this.dataProbGivenUnpaired = dataProbGivenUnpaired_a;
//System.out.println("Size of new aux data: " + dataProbGivenPaired.length);
}
public boolean isEmpty(int i)
{
return (dataProbGivenPaired[i]==1&&dataProbGivenUnpaired[i]==1);
}
}