/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package edu.mayo.bior.pipeline.SNPEff;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import org.apache.log4j.Logger;
import com.google.gson.JsonObject;
import com.tinkerpop.pipes.Pipe;
import com.tinkerpop.pipes.PipeFunction;
import com.tinkerpop.pipes.transform.TransformFunctionPipe;
import com.tinkerpop.pipes.util.Pipeline;
import edu.mayo.bior.pipeline.SNPEff.SNPEffHelper.InfoFieldKey;
import edu.mayo.bior.pipeline.VCFProgramPipes.VCFProgram2HistoryPipe;
import edu.mayo.exec.UnixStreamCommand;
import edu.mayo.pipes.history.History;
/**
*
* @author m089716
*/
public class SNPEffPostProcessPipeline {
private static final Logger log = Logger.getLogger(UnixStreamCommand.class);
boolean summarizeEffect = true;
public static final String SNPEFF_EFFECT_METADATA_DELIMITER = "[()]";
public static final String SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER = "\\|";
/**
* SNPEff outputs more than one Effect for a variant.. if 'summarizeEffect' is set to true
* only the 'MostSignificantEffect' is shown. Which is default behavior in TreatWorkflow.
* @param summarizeEffect
*/
public SNPEffPostProcessPipeline(boolean summarizeEffect) {
this.summarizeEffect = summarizeEffect;
}
private static String[] headers = {"Effect",
"Effect_impact",
"Functional_class",
"Codon_change",
"Amino_acid_change",
"Gene_name",
"Gene_bioType",
"Coding",
"Transcript",
"Exon"
};
/**
*
* @param input - the output line/string of snpeff
* @param exe
* @param transform
* @param output - formatted/parsed result file from snpeff
* @return
*/
public Pipe getSNPEffPostProcessPipeline(Pipe input, Pipe output) {
Pipe<History,History> transform = new TransformFunctionPipe<History,History>(new SNPEffTransformPipe(this.summarizeEffect));
Pipe pipe = new Pipeline(
input,
new VCFProgram2HistoryPipe(), //used to remove all header lines from the input
transform,
output
);
return pipe;
}
public TransformFunctionPipe getSNPEffTransformPipe(boolean showMostSignificantEffectOnly){
return new TransformFunctionPipe( new SNPEffTransformPipe(showMostSignificantEffectOnly) );
}
/**
*
* @author m089716
*
*/
public static class SNPEffTransformPipe implements PipeFunction<History, History> {
boolean showMostSignificantEffectOnly = true;
public SNPEffTransformPipe(boolean showMostSignificantEffectOnly) {
this.showMostSignificantEffectOnly = showMostSignificantEffectOnly;
}
// @Override
public History compute(History history) {
// History.getMetaData().setOriginalHeader(m)
String parsedEffValue = this.parseEFFColumnData(history);
//add the parsed-effect-value as a json string to the end of history
history.add(parsedEffValue);
return history;
}
/**
*
* @param history
* @return parses the EFF column data from the SNPEff output file and returns either
* the 'most-significant-effect-as-a-json-string or all-effects-as-a-string-of-json-arrays
*/
private String parseEFFColumnData(History history) {
String rawEff="";
String parsedEff="{}"; //default the cloumn with empty JSON
Map<String, String> splitEffectCoreValues;
List<SNPEffectHolder> snpEffectHolderObjs = new ArrayList<SNPEffectHolder>();
SNPEffectHolder snpEffectHolder = null;
String attrib_effect = "";
try {
if (history.size() > 7) {
if (history.get(7)!=null && !history.get(7).equals("")) {
if (history.get(7).contains("EFF=")) {
rawEff = history.get(7); //last column has EFF
String rawEffValue = rawEff.substring(rawEff.indexOf("EFF=")+4, rawEff.length());
List<String> allEffects = null;
try {
allEffects = Arrays.asList(rawEffValue.split(",")); //EXON(|||), NON(|||), etc
} catch (Exception ex) {
log.error("SNPEffPostProcess Failed with message:: Effect values are not wellformed!!" + ex.getMessage());
}
for (String effect : allEffects) {
attrib_effect = effect.substring(0, effect.indexOf("("));
//System.out.println("attrib_effect="+attrib_effect);
splitEffectCoreValues = new HashMap<String, String>();
//System.out.println(InfoFieldKey.EFFECT_KEY.getKeyName());
splitEffectCoreValues.put(InfoFieldKey.EFFECT_KEY.getKeyName(), attrib_effect);
String effectCoreValues = effect.substring(effect.indexOf("(")+1, effect.indexOf(")"));
//System.out.println("core="+effectCoreValues);
String[] splitValues = effectCoreValues.split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER);
//System.out.println(Arrays.asList(splitValues));
for(int i=0;i<=splitValues.length;i++) {
splitEffectCoreValues.put(InfoFieldKey.IMPACT_KEY.getKeyName(), splitValues[0]);
if (splitValues.length > 1) {
splitEffectCoreValues.put(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), splitValues[1]);
} else {
splitEffectCoreValues.put(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), "");
}
if (splitValues.length > 2) {
splitEffectCoreValues.put(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), splitValues[2]);
} else {
splitEffectCoreValues.put(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), "");
}
if (splitValues.length > 3) {
splitEffectCoreValues.put(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), splitValues[3]);
} else {
splitEffectCoreValues.put(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), "");
}
if (splitValues.length > 4) {
splitEffectCoreValues.put(InfoFieldKey.GENE_NAME_KEY.getKeyName(), splitValues[4]);
} else {
splitEffectCoreValues.put(InfoFieldKey.GENE_NAME_KEY.getKeyName(), "");
}
if (splitValues.length > 5) {
splitEffectCoreValues.put(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), splitValues[5]);
} else {
splitEffectCoreValues.put(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), "");
}
if (splitValues.length > 6) {
splitEffectCoreValues.put(InfoFieldKey.CODING.getKeyName(), splitValues[6]);
} else {
splitEffectCoreValues.put(InfoFieldKey.CODING.getKeyName(), "");
}
if (splitValues.length > 7) {
splitEffectCoreValues.put(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), splitValues[7]);
} else {
splitEffectCoreValues.put(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), "");
}
if (splitValues.length > 8) {
splitEffectCoreValues.put(InfoFieldKey.EXON_ID_KEY.getKeyName(), splitValues[8]);
} else {
//In the Effect, ExonId is the last column and is sometimes empty. In that case, add it explicitly
splitEffectCoreValues.put(InfoFieldKey.EXON_ID_KEY.getKeyName(), "");
}
}
snpEffectHolder = new SNPEffectHolder(splitEffectCoreValues);
snpEffectHolderObjs.add(snpEffectHolder);
}
if (this.showMostSignificantEffectOnly) {
// Add only annotations for one of the most biologically-significant effect from this set:
SNPEffectHolder mostSignificantEffect = SNPEffHelper.getMostSignificantEffect(snpEffectHolderObjs);
//System.out.println("mostSignificantEffect="+mostSignificantEffect.toString());
parsedEff = jsonize(mostSignificantEffect.getAnnotationAsList());
} else {
// get individual effects, add them to an array, build a json array (using jsonize below)
String outJson = "";
List<String> resultsJsonStrings = new ArrayList<String>();
for(SNPEffectHolder snpEffectHolderObj : snpEffectHolderObjs) {
outJson = jsonize(snpEffectHolderObj.getAnnotationAsList());
resultsJsonStrings.add(outJson);
}
parsedEff = buildJsonArray(resultsJsonStrings);
}
} else if (history.get(8).contains("SNPEFFERR=")) {
// Parse the error message
String message = history.get(8).substring(history.get(8).indexOf("SNPEFFERR="), history.get(8).length());
//System.out.println(message);
if (!message.equals("")) {
JsonObject jObj = new JsonObject();
jObj.addProperty("SNPEffMessage", message);
jObj.addProperty("Status", "SNPEff failed to assign function to this variant");
//System.out.println(jObj.toString());
parsedEff = jObj.toString();
} else {
log.error("SNPEffPostProcess failed with message::Cannot retrieve error message from SNPEff results!");
}
} else {
log.error("SNPEffPostProcess failed with message::INFO column does not have valid EFF or SNPEFFERROR to parse!");
}
} else {
log.error("SNPEffPostProcess failed with message::Cannot Process SNPEff Result, INFO column not found!");
}
} else {
log.error("SNPEffPostProcess failed with message::Cannot Process SNPEff Result, INFO column not found!");
}
} catch (Exception ex) {
log.error("SNPEffPostProcess Failed with message::" + ex.getMessage());
}
return parsedEff;
}
/**
* Convert List<String> to Json
* @param EFF = [[UPSTREAM, MODIFIER, , , , , LINC00515, antisense, NON_CODING, ENST00000567517, ]]
* @return as below:
* {"Effect":"UPSTREAM","Effect_impact":"MODIFIER","Gene_name":"LINC00515","Gene_bioType":"antisense","Coding":"NON_CODING","Transcript":"ENST00000567517"}
*/
private String jsonize(List<String> eff){
JsonObject jObj = new JsonObject();
try {
for(int i=0; i<eff.size(); i++){
if(eff.get(i).length() > 0 && headers.length >= i){
//System.out.println(headers[i]);
jObj.addProperty(headers[i], eff.get(i));
}
}
} catch (Exception ex) {
log.error(ex.getMessage());
}
return jObj.toString();
}
/**
* Convert List<json strings> to an "Array of Json"
* @param resultsJsonStrings = "resultsJsonStrings" contains individual json strings, this method constructs a JSON formatted arry with those values
* @return as below: *
* "EFF":
* [
* {
* "Effect":"INTRON"
* "Effect_Impact":"MODERATE"
* ...
* },
* {
* "Effect":"INTRON"
* "Effect_Impact":"MODERATE"
* ...
* }
* ]
*/
private String buildJsonArray(List<String> resultsJsonStrings) {
// TODO use json dom to generate json array, instead of building the array manually
StringBuilder arrayOfJsons = new StringBuilder();
arrayOfJsons.append("{"); //"{":
arrayOfJsons.append("\""); //"EFF":
arrayOfJsons.append("EFF"); //"EFF":
arrayOfJsons.append("\""); //"EFF":
arrayOfJsons.append(":"); //"EFF":
arrayOfJsons.append("["); //"[":
for(ListIterator<String> it = resultsJsonStrings.listIterator(); it.hasNext() ;) {
//outJson has one set of results..
arrayOfJsons.append(it.next());
if (it.hasNext()) {
arrayOfJsons.append(",");
}
}
arrayOfJsons.append("]"); //"]":
arrayOfJsons.append("}"); //"{":
return arrayOfJsons.toString();
}
}
}