package org.seqcode.genome.sequence.seqfunctions;
import java.util.ArrayList;
import java.util.List;
import org.seqcode.genome.location.Point;
import org.seqcode.genome.location.Region;
import org.seqcode.genome.location.ScoredPoint;
import org.seqcode.genome.location.ScoredStrandedPoint;
import org.seqcode.genome.sequence.ScoredSequence;
import org.seqcode.genome.sequence.SequenceGenerator;
import org.seqcode.genome.sequence.SequenceUtils;
/**
* Create a composite scoring function of a particular type of SeqFunction
*
* @author mahony
*
* @param <F> SeqFunction
*/
public class CompositeSeqFunction<F extends SeqFunction> {
F function;
int width;
double [][] means;
double [][] variances;
double [][][] rawScores;
List<ScoredSequence> scoredSeqs;
int zeroOffset=0;
public CompositeSeqFunction(F fn, int width, int zeroOffset){
function = fn;
this.width = width;
means = new double[function.scoreDimension()][width];
variances = new double[function.scoreDimension()][width];
for(int i=0; i<function.scoreDimension(); i++)
for(int j=0; j<width; j++){
means[i][j]=0;
variances[i][j]=0;
}
this.scoredSeqs = new ArrayList<ScoredSequence>();
this.zeroOffset = zeroOffset;
}
//Accessors
public int getWidth(){return width;}
public double[][] getMeans(){return means;}
public double[][] getVariances(){return variances;}
public int getNumSeqs(){return scoredSeqs.size();}
public F getFunction(){return function;}
public int getZeroOffset(){return zeroOffset;}
public double[][][] getAllScores(){return rawScores;}
/**
* Add sequences (possibly weighted) to the composite list
* @param points
*/
public void addSequencesFromPoints(List<Point> points, SequenceGenerator seqgen){
for(Point p : points){
double weight = 1;
char strand = '+';
if(p instanceof ScoredStrandedPoint){
strand = p.getStrand();
weight = ((ScoredPoint)p).getScore();
}
Region r = new Region(p.getGenome(), p.getChrom(), p.getLocation()-(width/2)+1, p.getLocation()+(width/2));
String seq = seqgen.execute(r);
//Handle minus strand regions
if(strand=='-')
seq = SequenceUtils.reverseComplement(seq);
//Ignore regions that are too short (presumably because they are at the edge of the chromosome)
if(seq.length()==width){
scoredSeqs.add(new ScoredSequence(seq, weight));
}
}
}
/**
* Add equally weighted sequences to the composite list
* @param seqs
*/
public void addSequences(List<String> seqs){
for(String s : seqs){
if(s.length()==width){
scoredSeqs.add(new ScoredSequence(s, 1.0));
}
}
}
/**
* Add weighted sequences to the composite list
* @param seqs
*/
public void addScoredSequences(List<ScoredSequence> sseqs){
scoredSeqs.addAll(sseqs);
}
/**
* Populate the composite profile with regions of a fixed width around points
* @param points
*/
public void populate(){
try{
int numSeqs = 0;
double normTotal=0;
//Weighted mean
for(ScoredSequence s : scoredSeqs){
if(s.getSeq().length()==width){
numSeqs++;
normTotal+=s.getScore();
double[][] tmpScores = function.score(s.getSeq());
for(int i=0; i<function.scoreDimension(); i++)
for(int j=0; j<width; j++)
means[i][j]+=(tmpScores[i][j]*s.getScore());
}
}
for(int i=0; i<function.scoreDimension(); i++)
for(int j=0; j<width; j++)
means[i][j] = means[i][j]/normTotal;
//Init the raw scores
rawScores = new double[function.scoreDimension()][width][numSeqs];
//Weighted variance & raw scores
int sx=0;
for(ScoredSequence s : scoredSeqs){
if(s.getSeq().length()==width){
double[][] tmpScores = function.score(s.getSeq());
for(int i=0; i<function.scoreDimension(); i++)
for(int j=0; j<width; j++){
variances[i][j]+=s.getScore()*(tmpScores[i][j]-means[i][j])*(tmpScores[i][j]-means[i][j]);
rawScores[i][j][sx]=tmpScores[i][j];
}
sx++;
}
}
for(int i=0; i<function.scoreDimension(); i++)
for(int j=0; j<width; j++)
variances[i][j] = variances[i][j]/normTotal;
} catch (SeqFunctionException e) {
e.printStackTrace();
}
}
public void printFunctionMeans(){
String[] labels = function.dimensionLabels();
for(int i=0; i<function.scoreDimension(); i++){
System.out.print(labels[i]);
for(int j=0; j<width; j++)
System.out.print("\t"+means[i][j]);
System.out.println("");
}
}
public void printFunctionVariances(){
String[] labels = function.dimensionLabels();
for(int i=0; i<function.scoreDimension(); i++){
System.out.print(labels[i]);
for(int j=0; j<width; j++)
System.out.print("\t"+variances[i][j]);
System.out.println("");
}
}
}