package com.ppfold.algo.extradata; import java.io.BufferedInputStream; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.util.Iterator; import java.util.List; import java.util.regex.Pattern; import com.ppfold.algo.MatrixTools; /** * Static class to define the model for how to deal with SHAPE data as well as container for the actual data * * @author Z.Sukosd */ public class ExtraDataBars implements ExtraData { //the percent error assumed in each individual SHAPE measurement //(this is used to define the interval to calculate probability integrals) private int type = 0; private float[] data; //contains the actual measurement numbers (data) private float[] distributionPaired; //contains the probability density function, paired case (model) private float[] distributionUnpaired; //contains the probability density function, unpaired case (model) private float[] distributionLimits; //contains the LOWER limits for the probability densities (model) //Probabilities must always be given as P(data|unpaired) or P(data|paired). private float[] dataProbGivenPaired; //P(data|model), unpaired case private float[] dataProbGivenUnpaired; //P(data|model), paired case public ExtraDataBars(){} public int getType(){ return type; } public boolean isEmpty(int i) { return data[i]==-999; } private void setDistribution(float[] unpaired, float[] paired, float[] limits){ distributionPaired = paired; distributionUnpaired = unpaired; distributionLimits = limits; } public float getProbabilityGivenOuterPaired(int position1, int position2) { return getProbabilityGivenPaired(position1)*getProbabilityGivenPaired(position2); } public float getProbabilityGivenInnerPaired(int position1, int position2) { return getProbabilityGivenPaired(position1)*getProbabilityGivenPaired(position2); } public float[] getDistributionPaired() { return distributionPaired; } public float[] getDistributionUnpaired() { return distributionUnpaired; } public float[] getDistributionLimits() { return distributionLimits; } public float getProbabilityGivenPaired(int n){ return dataProbGivenPaired[n]; } public float getProbabilityGivenInnerPair(int n){ return getProbabilityGivenPaired(n); } public float getProbabilityGivenOuterPair(int n){ return getProbabilityGivenPaired(n); } public float getProbabilityGivenUnpaired(int n){ return dataProbGivenUnpaired[n]; } /** * Reads SHAPE distribution data from the file specified * @throws IOException */ public static ExtraDataBars readDistTable(String filename) throws IOException{ BufferedInputStream stream = null; try { stream = new BufferedInputStream(new FileInputStream(filename)); } catch (FileNotFoundException e) { System.err.println("SHAPE input file " + filename + " could not be read!"); throw new IOException(e); } return readDistTable_toStream(stream); } public static ExtraDataBars readDistTable_toStream(BufferedInputStream stream){ //the CLC import plugin uses the following line: //public ClcObject[] doImport(BufferedInputStream stream, String name, Activity activity) throws IOException, ParseException, PersistenceException { //the same stream will be passed here to ease the import process later. ExtraDataBars result = null; if(stream!=null){ try{ int l = stream.available(); byte[] bytes = new byte[l]; stream.read(bytes); stream.close(); String data_string = new String(bytes); String[] lines = data_string.split("\n"); //First line is text int data_size = lines.length-1; //Create SHAPEdata object result = new ExtraDataBars(); float [] paired = new float[data_size]; float [] unpaired = new float[data_size]; float [] limits = new float[data_size]; // Create a pattern to match different kinds of separators Pattern p = Pattern.compile("[,\\s]+"); //Ignore first line... for(int i = 1; i<lines.length; i++){ String line = lines[i]; String splitline[] = p.split(line.trim()); limits[i-1] = Float.valueOf(splitline[0]); paired[i-1] = Float.valueOf(splitline[1]); unpaired[i-1] = Float.valueOf(splitline[2]); } result.setDistribution(unpaired,paired,limits); } catch(Exception e){ System.err.println("An exception occured while attempting to read the SHAPE data."); } } else{ System.err.println("Input stream was null, SHAPE data could not be loaded."); } return result; } public void importData(String filename, int sequencelength) throws Exception{ //read the SHAPE data BufferedInputStream stream = null; try { stream = new BufferedInputStream(new FileInputStream(filename)); } catch (FileNotFoundException e) { System.err.println("SHAPE sequence input file " + filename + " could not be read!"); throw new IOException(e); } readData_toStream(stream, sequencelength); } public void readData_toStream(BufferedInputStream stream, int sequencelength) throws Exception{ this.data = new float[sequencelength]; this.dataProbGivenPaired = new float[sequencelength]; this.dataProbGivenUnpaired = new float[sequencelength]; if(stream!=null){ String line = ""; try{ int l = stream.available(); byte[] bytes = new byte[l]; stream.read(bytes); stream.close(); String data_string = new String(bytes); String[] lines = data_string.split("\n"); int data_size = lines.length; int [] readdata_index = new int[data_size]; float [] readdata_data = new float[data_size]; // Create a pattern to match different kinds of separators Pattern p = Pattern.compile("[,\\s]+"); boolean validdata = false; //DO NOT ignore first line... for(int i = 0; i<lines.length; i++){ line = lines[i]; String splitline[] = p.split(line.trim()); if(splitline.length == 2){ validdata = true; readdata_index[i] = Integer.valueOf(splitline[0])-1; //SHAPE data files are numbered from 1; Java numbers from 0 readdata_data[i] = Float.valueOf(splitline[1]); } } //initialize all data values to -999 for(int i = 0; i<this.data.length; i++){ data[i] = -999; } //set the appropriate data values to the read ones for(int i = 0; validdata&&i<data_size; i++){ data[readdata_index[i]] = readdata_data[i]; } //calculate probabilities try{ calcProbabilities(); } catch(Exception e){ System.out.println("Probabilities for the data could not be calculated."); throw new Exception(e); } } catch(Exception e){ System.err.println("An exception occured while attempting to read or interpret the SHAPE sequence data. "); e.printStackTrace(); throw new Exception(e); } } else{ System.err.println("Input stream was null, SHAPE sequence data could not be loaded."); } } private void calcProbabilities(){ int n = data.length; int distlength = distributionLimits.length; for(int i = 0; i<n; i++){ //For each data point, calculate the probability of data given paired/unpaired. if(data[i] == -999){ //ignore dataProbGivenPaired[i] = 1; dataProbGivenUnpaired[i] = 1; continue; } int index = 0; while(index!=distlength && !(data[i] < distributionLimits[index])){ index++; } index--; dataProbGivenPaired[i] = distributionPaired[index]; dataProbGivenUnpaired[i] = distributionUnpaired[index]; //System.out.println(data[i] + ": pairing="+dataProbGivenPaired[i] + ", " + // "unpaired="+dataProbGivenUnpaired[i]); } } public void transformToAlignment(String gappedseq) { int n = gappedseq.length(); float[] data_a = new float[n]; float[] dataProbGivenPaired_a = new float[n]; float[] dataProbGivenUnpaired_a = new float[n]; int cnt = 0; //counts sequence positions for(int i = 0; i<n; i++){ //step alignment positions if(MatrixTools.isGap(gappedseq.charAt(i))){ //if there's a gap, set probabilities to 1 data_a[i] = -999; //no data for that column dataProbGivenPaired_a[i] = 1; dataProbGivenUnpaired_a[i] = 1; } else{ data_a[i] = data[cnt]; dataProbGivenPaired_a[i] = dataProbGivenPaired[cnt]; dataProbGivenUnpaired_a[i] = dataProbGivenUnpaired[cnt]; cnt++; } //Prevent weird results by setting 0's to a very small finite number instead if(dataProbGivenPaired_a[i]==0){ dataProbGivenPaired_a[i]=Float.MIN_VALUE; } if(dataProbGivenUnpaired_a[i]==0){ dataProbGivenUnpaired_a[i]=Float.MIN_VALUE; } //System.out.println(data_a[i] + ": pairing="+dataProbGivenPaired_a[i] + ", " + // "unpaired="+dataProbGivenUnpaired_a[i]); } this.data = data_a; this.dataProbGivenPaired = dataProbGivenPaired_a; this.dataProbGivenUnpaired = dataProbGivenUnpaired_a; } public void removeColumns(List<Integer> leftoutcolumns){ Iterator<Integer> iter = leftoutcolumns.iterator(); int leaveout = 0; int from = 0; int cnt = 0; //counts position in new thing float[] data_a = new float[this.data.length - leftoutcolumns.size()]; float[] dataProbGivenPaired_a = new float[this.data.length - leftoutcolumns.size()]; float[] dataProbGivenUnpaired_a = new float[this.data.length - leftoutcolumns.size()]; while(iter.hasNext()){ leaveout = iter.next(); for(int i = from; i<leaveout; i++){ data_a[cnt] = this.data[i]; dataProbGivenPaired_a[cnt] = dataProbGivenPaired[i]; dataProbGivenUnpaired_a[cnt] = dataProbGivenUnpaired[i]; cnt++; } from = leaveout+1; } //do the last part part for(int i = from; i<data.length; i++){ data_a[cnt] = this.data[i]; dataProbGivenPaired_a[cnt] = dataProbGivenPaired[i]; dataProbGivenUnpaired_a[cnt] = dataProbGivenUnpaired[i]; cnt++; } this.data = data_a; this.dataProbGivenPaired = dataProbGivenPaired_a; this.dataProbGivenUnpaired = dataProbGivenUnpaired_a; //System.out.println("Size of new auxdata data: " + data.length); } }