package com.github.lindenb.jvarkit.util.vcf.predictions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import htsjdk.variant.variantcontext.VariantContext;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFInfoHeaderLine;
import com.github.lindenb.jvarkit.tools.vcfannot.VCFPredictions;
import com.github.lindenb.jvarkit.util.so.SequenceOntologyTree;
/**
* @author lindenb
*
*/
public class MyPredictionParser implements PredictionParser
{
private static final Logger LOG=Logger.getLogger("jvarkit");
private Map<VCFPredictions.FORMAT1, Integer> col2col=new HashMap<VCFPredictions.FORMAT1, Integer>();
private SequenceOntologyTree soTree = SequenceOntologyTree.getInstance();
private final Pattern pipe=Pattern.compile("[\\|]");
private final Pattern ampRegex = Pattern.compile("[&]");
public final String getTag()
{
return VCFPredictions.TAG;
}
public MyPredictionParser(VCFHeader header)
{
VCFInfoHeaderLine info=header.getInfoHeaderLine(getTag());
if(info==null || info.getDescription()==null)
{
LOG.warning("NO "+getTag()+" found in header");
return;
}
String description=info.getDescription();
String chunck=" Format:";
int i=description.indexOf(chunck);
if(i==-1)
{
LOG.warning("Cannot find "+chunck+ " in "+description);
return;
}
description=description.substring(i+chunck.length()).replaceAll("[ \'\\.\\(\\)]+","").trim();
String tokens[]=pipe.split(description);
for(i=0;i< tokens.length;++i)
{
if(tokens[i].isEmpty()) continue;
VCFPredictions.FORMAT1 col=null;
for(VCFPredictions.FORMAT1 c:VCFPredictions.FORMAT1.values())
{
if(c.name().equalsIgnoreCase(tokens[i]))
{
col=c;
}
}
if(col==null)
{
LOG.warning("Undefined "+" tag "+tokens[i]);
continue;
}
col2col.put(col, i);
}
}
public MyPredictionParser sequenceOntologyTree( final SequenceOntologyTree soTree) {
this.soTree = soTree;
return this;
}
@Override
public List<MyPrediction> getPredictions(final VariantContext ctx)
{
ArrayList<MyPrediction> preds= new ArrayList<MyPrediction>();
if(col2col.isEmpty()) return preds;
Object o=ctx.getAttribute(getTag());
if(o==null)
{
return preds;
}
if(o.getClass().isArray())
{
for(Object o2:(Object[])o) _predictions(preds,o2);
}
else if(o instanceof Collection)
{
for(Object o2:(Collection<?>)o) _predictions(preds,o2);
}
else
{
_predictions(preds, o);
}
return preds;
}
private void _predictions( List<MyPrediction> preds,Object o)
{
final MyPrediction p= parseOnePrediction(o);
if(p==null) return;
preds.add(p);
}
public MyPrediction parseOnePrediction(final Object o)
{
if(o==null) return null;
if(!(o instanceof String))
{
return parseOnePrediction( o.toString());
}
final String s=String.class.cast(o).trim();
final String tokens[]=pipe.split(s);
return new MyPrediction(tokens);
}
public class MyPrediction
implements Prediction
{
private String tokens[];
MyPrediction(String tokens[])
{
this.tokens=tokens;
}
private String getByCol(VCFPredictions.FORMAT1 col)
{
Integer idx=col2col.get(col);
if(idx==null || idx>=tokens.length || tokens[idx].isEmpty())
{
return null;
}
return tokens[idx];
}
public String getTranscript()
{
return getByCol(VCFPredictions.FORMAT1.TRANSCRIPT);
}
public String getGeneName()
{
return getTranscript();
}
private Map<VCFPredictions.FORMAT1,String> getMap()
{
Map<VCFPredictions.FORMAT1, String> hash=new HashMap<VCFPredictions.FORMAT1,String>();
for(VCFPredictions.FORMAT1 c: col2col.keySet())
{
int idx=col2col.get(c);
if(idx>=this.tokens.length) continue;
hash.put(c, tokens[idx]);
}
return hash;
}
public String getCodonChange()
{
return getByCol(VCFPredictions.FORMAT1.CODON);
}
public String getAltCodon()
{
String s=getCodonChange();
if(s==null || s.isEmpty()) return null;
int slash=s.indexOf('/');
return slash==-1?null:s.substring(slash+1);
}
public String getRefCodon()
{
String s=getCodonChange();
if(s==null || s.isEmpty()) return null;
int slash=s.indexOf('/');
return slash==-1?null:s.substring(0,slash);
}
public String getAminoAcidChange()
{
return getByCol(VCFPredictions.FORMAT1.AA);
}
public String getAltAminoAcid()
{
String s=getAminoAcidChange();
if(s==null || s.isEmpty()) return null;
int slash=s.indexOf('/');
return slash==-1?null:s.substring(slash+1);
}
public Integer getAminoAcidPosition()
{
String s= getByCol(VCFPredictions.FORMAT1.PROTPOS);
if(s==null || s.isEmpty()) return null;
try {
return Integer.parseInt(s);
} catch (Exception e) {
return null;
}
}
public String getReferenceAminoAcid() {
String s=getAminoAcidChange();
if(s==null || s.isEmpty()) return null;
int slash=s.indexOf('/');
return slash==-1?null:s.substring(0,slash);
}
public String getSOTermsString()
{
final String EFF=getByCol(VCFPredictions.FORMAT1.SEQONTOLOGY);
return EFF==null?"":EFF;
}
public List<String> getSOTermsStrings()
{
final String soterms=getSOTermsString();
if(soterms==null || soterms.isEmpty()) return Collections.emptyList();
return Arrays.asList(MyPredictionParser.this.ampRegex.split(soterms));
}
public Set<SequenceOntologyTree.Term> getSOTerms()
{
final Set<SequenceOntologyTree.Term> set=new HashSet<>();
for(final String eff:getSOTermsStrings())
{
SequenceOntologyTree.Term t = MyPredictionParser.this.soTree.getTermByLabel(eff);
if(t!=null) set.add(t);
}
return set;
}
@Override
public String toString() {
return getMap().toString()+ " "+Arrays.asList(tokens);
}
}
}