package edu.mayo.bior.pipeline.VEP;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
/**
* @author Michael Meiners (m054457)
* Date created: Apr 25, 2013
*/
public class VepFunctions {
private final static String[] headers = {
"Allele",
"Gene",
"Feature",
"Feature_type",
"Consequence",
"cDNA_position",
"CDS_position",
"Protein_position",
"Amino_acids",
"Codons",
"Existing_variation",
"HGNC",
"DISTANCE",
"SIFT",
"PolyPhen",
"CELL_TYPE"
};
class Effect {
public String term;
public Double score;
}
/** Turn CSQ result ("CSQ=x|y|z") into json ("{CSQ:{"a":"x","b":"y","c":"z"}}") */
public JsonArray vepCsqToJsonList(String vepCsq) {
JsonArray jsonArray = new JsonArray();
if (vepCsq.contains("VEPERR")) {
JsonObject jObj = new JsonObject();
jObj.addProperty("VEPMessage", "VEPERRORMessage");
jObj.addProperty("Status", "VEP failed to assign function to this variant");
jsonArray.add(jObj);
} else {
String[] vepCsqItems = vepCsq.replace("CSQ=", "").split(",");
for(String csqItem : vepCsqItems) {
jsonArray.add(vepCsqToJson(csqItem));
}
}
return jsonArray;
}
/** Turn CSQ result into JSON
* Ex CSQ: "A|ENSG00000154719|ENST00000307301|Transcript|missense_variant|1043|1001|334|T/M|aCg/aTg||MRPL39||tolerated(0.05)|benign(0.001)|"
* Ex JSON:"{"Allele":"A","Gene":"ENSG00000154719","Feature":"ENST00000307301","Feature_type":"Transcript","Consequence":"missense_variant","cDNA_position":"1043","CDS_position":"1001","Protein_position":"334","Amino_acids":"T/M","Codons":"aCg/aTg","HGNC":"MRPL39","SIFT":"tolerated(0.05)","PolyPhen":"benign(0.001)","SIFT_TERM":"tolerated","SIFT_Score":0.05,"PolyPhen_TERM":"benign","PolyPhen_Score":0.001}" */
private JsonObject vepCsqToJson(String vepCsqItem) {
String[] split = vepCsqItem.split("\\|");
JsonObject jsonObj = new JsonObject();
// NOTE: the split ignores any empty columns at the end, so there may be fewer than 16 after split
// For ex: "a||b|||" will only have 3 columns
for(int i=0; i < Math.min(split.length, headers.length); i++) {
if( ! "".equals(split[i]))
jsonObj.addProperty(headers[i], split[i]);
}
boolean isSiftPresent = false;
if( split.length > 13 && split[13].contains("(") && split[13].contains(")") ) {
isSiftPresent = true;
Effect sift = parseEffect(split[13]);
jsonObj.addProperty("SIFT_TERM", sift.term);
jsonObj.addProperty("SIFT_Score", sift.score);
}
// TODO: These had been the defaults - are they necessary? Should we populate polyphen with defaults if not given?
//Double pscore = 0.0;
//String pterm = "not_predicted";
if( split.length > 14 && split[14].contains("(") && split[14].contains(")") ) {
Effect polyphen = parseEffect(split[14]);
jsonObj.addProperty("PolyPhen_TERM", polyphen.term);
jsonObj.addProperty("PolyPhen_Score", polyphen.score);
} // else if( isSiftPresent ) {
// set default values for polyphen
//}
return jsonObj;
}
/** Convert string (ex: "tolerated(0.05)") to an Effect object */
private Effect parseEffect(String effStr) {
int idxOpenParen = effStr.indexOf("(");
int idxCloseParen= effStr.indexOf(")");
if(idxOpenParen == -1 || idxCloseParen == -1 )
return null;
Effect effect = new Effect();
effect.term = effStr.substring(0, idxOpenParen);
effect.score = Double.parseDouble(effStr.substring(idxOpenParen+1, idxCloseParen));
return effect;
}
/** Given a list of vepCsqOutputs as JSON strings, return the one that has the worst outcome */
public JsonObject getWorstCase(JsonArray jsonArray) {
Double worstSift = 1.0;
Double worstPoly = Double.MAX_VALUE;
// NOTE: if the worstJson object is never set, it will be converted to string "{}"
JsonObject worstJson = new JsonObject();
// TODO: What if EITHER sift or polyphen is not given???????????
for(int i=0; i < jsonArray.size(); i++) {
JsonObject jsonObj = jsonArray.get(i).getAsJsonObject();
JsonElement siftScore = jsonObj.get("SIFT_Score");
JsonElement polyphenScore = jsonObj.get("PolyPhen_Score");
// Ignore this item if it does not have a score to compare
if(siftScore == null || polyphenScore == null)
continue;
// If we don't have a worst yet, then add this one, else compare the scores to see if this one's is worse
if(worstJson.entrySet().isEmpty() || (siftScore.getAsDouble() <= worstSift && polyphenScore.getAsDouble() >= worstPoly) ){
worstSift = siftScore.getAsDouble();
worstPoly = polyphenScore.getAsDouble();
worstJson = jsonObj;
}
}
return worstJson;
}
}