/*******************************************************************************
* GenPlay, Einstein Genome Analyzer
* Copyright (C) 2009, 2014 Albert Einstein College of Medicine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu>
* Nicolas Fourel <nicolas.fourel@einstein.yu.edu>
* Eric Bouhassira <eric.bouhassira@einstein.yu.edu>
*
* Website: <http://genplay.einstein.yu.edu>
******************************************************************************/
package edu.yu.einstein.genplay.core.multiGenome.operation.VCF;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFLine;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile.VCFFile;
import edu.yu.einstein.genplay.core.multiGenome.operation.UpdateEngine;
import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.FileScannerInterface;
import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.UpdateFileScanner;
import edu.yu.einstein.genplay.dataStructure.enums.AlleleType;
import edu.yu.einstein.genplay.util.Utils;
/**
* This class create an new VCF file based on a VCF file, using the data of a VCF track.
* The update here is about the genotype field.
* It will use the genotype of variation from a track to change the one of another VCF.
*
* @author Nicolas Fourel
* @version 0.1
*/
public class MGOApplyVCFGenotype extends UpdateEngine {
protected BufferedWriter data; // The output file stream
private List<String> destGenome; // The names of the genomes to export
private boolean canBeInserted; // If the current line can be inserted
@Override
protected boolean canStart() throws Exception {
List<VCFFile> fileList = getFileList();
if (fileList.size() == 1) {
if (fileList.get(0).getFile().exists() && fileToUpdate.getFile().exists()) {
fileScanner = new UpdateFileScanner(this, fileList.get(0), fileToUpdate);
if (genomeNameMap.size() > 0) {
return true;
}
System.err.println("MGOApplyVCFGenotype.canStart() Files does not have any genome name in common.");
}
else {
System.err.println("MGOApplyVCFGenotype.canStart() At least one of the file does not exist.");
}
}
System.err.println("MGOApplyVCFGenotype.canStart() number of files invalid: " + fileList.size());
return false;
}
@Override
protected void process() throws Exception {
File dataFile = new File(path);
FileWriter fw = new FileWriter(dataFile);
data = new BufferedWriter(fw);
String header = ((UpdateFileScanner) fileScanner).getDestinationReader().getFullHeader();
header += "\n";
data.write(header);
fileScanner.compute();
data.close();
data.close();
}
@Override
public void processLine(VCFLine src, VCFLine dest) throws IOException {
canBeInserted = false;
destGenome = fileToUpdate.getHeader().getGenomeRawNames();
List<String> gtList = getNewGtList(src, dest);
if (canBeInserted) { // set a cascade of canBeInserted to be sure not to process data if there is no need
String format = getFormatString(dest, gtList);
String line = getFullLine(dest, format);
line += "\n";
if (canBeInserted) {
data.write(line);
}
}
}
/**
* @param src the line to use as a model
* @param dest the line to update
* @return the list of all genotypes
*/
private List<String> getNewGtList (VCFLine src, VCFLine dest) {
List<String> gtList = new ArrayList<String>();
for (String destGenomeName: destGenome) {
String gtDest = dest.getGenotype(destGenomeName);
String newGtDest = gtDest;
if (genomeNameMap.containsKey(destGenomeName)) {
String srcGenomeName = genomeNameMap.get(destGenomeName);
String gtSrc = src.getGenotype(srcGenomeName);
if (hasAlternative(gtSrc)) {
canBeInserted = true;
if (isGenotypePhased(gtSrc)) {
String altSrc01 = getFormattedAlt(src.getAlternativeFromRawName(srcGenomeName, AlleleType.ALLELE01));
String altSrc02 = getFormattedAlt(src.getAlternativeFromRawName(srcGenomeName, AlleleType.ALLELE02));
int alleleDest01 = getAltIndex(dest.getAlternatives(), altSrc01);
int alleleDest02 = getAltIndex(dest.getAlternatives(), altSrc02);
if ((alleleDest01 == -1) || (alleleDest02 == -1)) {
canBeInserted = false;
System.err.println(src.getCHROM() + " " + src.getPOS() + ": Alternative '" + altSrc01 + "' and/or '" + altSrc02 + "' has not been found in alternatives field '" + dest.getAlternatives().toString() + "'.");
} else {
newGtDest = alleleDest01 + "|" + alleleDest02;
}
}
}
}
gtList.add(newGtDest);
}
return gtList;
}
/**
* @param gt a genotype
* @return true if the genotype codes for an alternative
*/
private boolean hasAlternative (String gt) {
boolean hasAlternative = false;
for (int i = 0; i < gt.length(); i++) {
char allele = gt.charAt(i);
if ((allele > 48) && (allele < 58)) { // From after 0 and until 9
hasAlternative = true;
}
}
return hasAlternative;
/*char allele01 = gt.charAt(0);
char allele02 = gt.charAt(2);
if (((allele01 == '.') || (allele01 == '0')) && ((allele02 == '.') || (allele02 == '0'))) {
return false;
}
return true;*/
}
/**
* @param gt a genotype
* @return true if phased, false otherwise
*/
private boolean isGenotypePhased (String gt) {
if (gt.length() > 1) {
if (gt.charAt(1) == '|') {
return true;
}
}
return false;
}
/**
* This method formats an unvalid alternative into an empty string.
* It does not change the alternative if it is correct.
* @param alt an alternative
* @return the formatted alternative
*/
private String getFormattedAlt (String alt) {
if (alt == null) {
alt = "";
}
return alt;
}
/**
* @param alternatives an array of alternatives
* @param alternative an alternative
* @return the index of an alternative among an array of alternatives
*/
private int getAltIndex (String[] alternatives, String alternative) {
int allele = -1;
if (!alternative.isEmpty()) {
for (int i = 0; i < alternatives.length; i++) {
if (alternative.equals(alternatives[i])) {
allele = i + 1;
break;
}
}
} else {
allele = 0;
}
return allele;
}
/**
* @param dest the line to update
* @param gtList the list of new genotypes
* @return the line with all format fields for all genomes
*/
private String getFormatString (VCFLine dest, List<String> gtList) {
String format = "";
int plIndex = getPLIndex(dest);
for (int i = 0; i < destGenome.size(); i++) {
String genome = destGenome.get(i);
String[] formatValues = dest.getFormatValues(genome);
String formatChunk = gtList.get(i);
boolean excludePL = false;
if (!gtList.get(i).equals(dest.getGenotype(genome))){
excludePL = true;
}
for (int j = 1; j < formatValues.length; j++) {
if (excludePL && (j == plIndex)) {
int num = Utils.split(formatValues[j], ',').length;
String tmp = ":0";
for (int k = 1; k < num; k++) {
tmp += ",0";
}
formatChunk += tmp;
} else {
formatChunk += ":" + formatValues[j];
}
}
format += formatChunk;
if (i < (destGenome.size() - 1)) {
format += "\t";
}
}
return format;
}
/**
* @param dest the line to update
* @return the index of the PL field in the format field, -1 if it does not exist
*/
private int getPLIndex (VCFLine dest) {
String[] format = dest.getFormat();
for (int i = 0; i < format.length; i++) {
if (format[i].equals("PL")){
return i;
}
}
return -1;
}
/**
* @param dest the line to update
* @param format the format part of the line
* @return the complete new line to insert
*/
private String getFullLine (VCFLine dest, String format) {
String line = "";
for (int i = 0; i < 9; i++) {
line += dest.getField(i) + "\t";
}
line += format;
return line;
}
@Override
public void processLine(FileScannerInterface fileAlgorithm) throws IOException {}
}