package com.github.lindenb.jvarkit.tools.jaspar;
import java.util.Iterator;
import java.util.regex.Pattern;
import htsjdk.tribble.readers.LineIterator;
public class Matrix
{
private static final char BASES[]={'A','C','G','T'};
public enum Type {PFM,PWM};
private Type type;
private String name;
private double data[];
private Matrix(String name,double data[])
{
this.type=Type.PFM;
this.name=name;
this.data=data;
}
public double score(CharSequence S)
{
if(S.length()!=this.length()) throw new IllegalArgumentException("Not same length S/matrix");
double score=0.0;
for (int x = 0; x < length(); x++)
{
int y = -1;
switch(S.charAt(x))
{
case 'a':case 'A': y=0; break;
case 'c':case 'C': y=1; break;
case 'g':case 'G': y=2; break;
case 't':case 'T': y=3; break;
}
if (y == -1) continue;
score += get(y,x);
}
return score;
}
public double sum(int column)
{
double t=0;
for(int y=0;y<4;++y) t+=get(y,column);
return t;
}
public double max(int column)
{
double t=-1;
for(int y=0;y<4;++y) t=Math.max(t, get(y,column));
return t;
}
public double max()
{
double m=0;
for(int x=0;x< length();++x) m+=max(x);
return m;
}
public String getArchetype()
{
StringBuilder b=new StringBuilder(this.length());
for(int x=0;x< length();++x)
{
char c='A';
double m=0;
for(int y=0;y<4;++y)
{
if(m>get(y,x)) continue;
c=BASES[y];
m=get(y,x);
}
b.append(c);
}
return b.toString();
}
public Matrix convertToPWM()
{
if(type!=Type.PFM) throw new IllegalStateException();
Matrix pwm=new Matrix(this.name,new double[data.length]);
pwm.type=Type.PWM;
/* number of observations */
double log2= Math.log(2);
double prior_params=0.25;
for(int x=0;x< length();++x)
{
int N=(int)sum(x);
for(int y=0;y<4;++y)
{
double f=this.get(y, x);
double w=Math.log((f+Math.sqrt(N)*prior_params)/(N+Math.sqrt(N))/prior_params)/log2;
pwm.data[y*length()+x]=w;
}
}
return pwm;
}
public int length()
{
return data.length/4;
}
public double get(int y,int x)
{
return data[y*length()+x];
}
public String getName()
{
return name;
}
@Override
public String toString()
{
StringBuilder b=new StringBuilder();
b.append(">").append(getName()).append('\n');
for(int y=0;y< 4;++y)
{
b.append(BASES[y]);
for(int x=0;x< length();++x)
{
b.append(" ").append(String.format("%3d",get(y, x)));
}
b.append("\n");
}
return b.toString();
}
/** iterator reading jaspar database */
public static Iterator<Matrix> iterator(LineIterator r)
{
return new MyIterator(r);
}
private static class MyIterator
implements Iterator<Matrix>
{
private Pattern ws=Pattern.compile("[\t ]+");
private LineIterator iter;
MyIterator(LineIterator iter)
{
this.iter=iter;
}
@Override
public boolean hasNext()
{
while(iter.hasNext())
{
String line=iter.peek();
if(line.isEmpty())
{
iter.next();
continue;
}
if(!line.startsWith(">"))
{
throw new RuntimeException("Expected line to start with '>' but got "+line);
}
return true;
}
return false;
}
@Override
public Matrix next() {
if(!hasNext()) throw new IllegalStateException();
String header=iter.next().substring(1).trim();
double data[]=null;
for(int i=0;i< 4;++i)
{
if(!iter.hasNext()) throw new IllegalStateException();
String line=iter.next();
String tokens[]=this.ws.split(line);
if(i==0)
{
data=new double[tokens.length*4];
}
else
{
if(tokens.length*4!=data.length) throw new RuntimeException("Bad matrix in "+header);
}
for(int j=0;j< tokens.length;++j) data[i*tokens.length+j]=Integer.parseInt(tokens[j]);
}
return new Matrix(header, data);
}
@Override
public void remove()
{
throw new UnsupportedOperationException();
}
}
}