/*******************************************************************************
* GenPlay, Einstein Genome Analyzer
* Copyright (C) 2009, 2014 Albert Einstein College of Medicine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu>
* Nicolas Fourel <nicolas.fourel@einstein.yu.edu>
* Eric Bouhassira <eric.bouhassira@einstein.yu.edu>
*
* Website: <http://genplay.einstein.yu.edu>
******************************************************************************/
package edu.yu.einstein.genplay.core.multiGenome.operation.BED;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFLine;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile.VCFFile;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFHeaderType.VCFHeaderAdvancedType;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFHeaderType.VCFHeaderType;
import edu.yu.einstein.genplay.core.multiGenome.operation.ExportEngine;
import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.FileScannerInterface;
import edu.yu.einstein.genplay.core.multiGenome.operation.fileScanner.SingleFileScanner;
import edu.yu.einstein.genplay.core.multiGenome.utils.VCFLineUtility;
import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome;
import edu.yu.einstein.genplay.dataStructure.enums.AlleleType;
import edu.yu.einstein.genplay.dataStructure.enums.CoordinateSystemType;
import edu.yu.einstein.genplay.dataStructure.enums.VCFColumnName;
import edu.yu.einstein.genplay.dataStructure.list.genomeWideList.SCWList.SCWList;
import edu.yu.einstein.genplay.dataStructure.list.genomeWideList.SCWList.SimpleSCWList.SimpleSCWList;
import edu.yu.einstein.genplay.exception.exceptions.InvalidChromosomeException;
import edu.yu.einstein.genplay.gui.track.Track;
import edu.yu.einstein.genplay.util.Utils;
/**
* Operation to convert a VCF track as a variable window track.
*
* @author Nicolas Fourel
*/
public class MGOBedConvertSingleFile extends ExportEngine {
private final String fullGenomeName; // The genome to convert.
private final Track firstAlleleTrack; // The track where the data of the first allele are.
private final Track secondAlleleTrack; // The track where the data of the second allele are.
private final Double dotValue; // The value to give for "." in genotype (omit => null)
private final VCFHeaderType header; // The header field to use as a score.
private final CoordinateSystemType coordinateSystem; // The coordinate system to export the positions.
private List<AlleleSettingsBedConvert> fullAlleleList; // The full list of allele settings helper.
private List<AlleleSettingsBedConvert> alleleListToConvert; // The list of allele settings helper to use.
private int genomeIndex; // The index of the genome in the file to export.
int cpt = 0;
/**
* Constructor of {@link MGOBedConvertSingleFile}
* @param fullGenomeName the full genome name of the genome to export
* @param firstAlleleTrack track to export the first allele
* @param secondAlleleTrack track to export the second allele
* @param dotValue value to give for "." in genotype (omit => null)
* @param header the header to use as a score
*/
public MGOBedConvertSingleFile (String fullGenomeName, Track firstAlleleTrack, Track secondAlleleTrack, Double dotValue, VCFHeaderType header) {
this.fullGenomeName = fullGenomeName;
this.firstAlleleTrack = firstAlleleTrack;
this.secondAlleleTrack = secondAlleleTrack;
this.dotValue = dotValue;
this.header = header;
isConversion = true;
coordinateSystem = CoordinateSystemType.METAGENOME;
}
@Override
protected boolean canStart() throws Exception {
List<VCFFile> fileList = getFileList();
if (fileList.size() == 1) {
if ((firstAlleleTrack != null) || (secondAlleleTrack != null)) {
fileScanner = new SingleFileScanner(this);
initializeAlleleList();
return true;
} else {
System.err.println("BedExportEngineSingleFile.canStart() one track has to be not null");
}
} else {
System.err.println("BedExportEngineSingleFile.canStart() number of files invalid: " + fileList.size());
}
return false;
}
/**
* @return the scored chromosome window list for the first track, null if not required
* @throws InvalidChromosomeException
* @throws InterruptedException
* @throws ExecutionException
*/
public SCWList getFirstList () throws InvalidChromosomeException, InterruptedException, ExecutionException {
if (firstAlleleTrack != null) {
AlleleSettingsBedConvert alleleSettings = alleleListToConvert.get(0);
return getList(alleleSettings);
}
return null;
}
/**
* @param alleleSettings the allele settings helper
* @return the {@link SCWList}
* @throws InvalidChromosomeException
* @throws InterruptedException
* @throws ExecutionException
*/
private SCWList getList (AlleleSettingsBedConvert alleleSettings) throws InvalidChromosomeException, InterruptedException, ExecutionException {
return new SimpleSCWList(alleleSettings.getListOfListViews());
}
/**
* @param currentLine the current line in process
* @param alleleExport the allele setting helper to use
* @return the score to use, null otherwise
*/
private Object getScore (VCFLine currentLine, AlleleSettingsBedConvert alleleExport) {
Object result = null;
if (alleleExport.isKnown()) {
Object value = currentLine.getHeaderField(header, genomeIndex);
if (value != null) {
Object[] values = Utils.split(value.toString(), ',');
int valueIndex = 0;
if (values.length > 1) {
if (header instanceof VCFHeaderAdvancedType) {
VCFHeaderAdvancedType advanced = (VCFHeaderAdvancedType) header;
if (!advanced.getNumber().equals("1")) {
valueIndex = alleleExport.getCurrentAltIndex();
if (advanced.getColumnCategory() == VCFColumnName.FORMAT) {
if (header.getId().equals("AD")) {
valueIndex++;
}
}
}
}
}
if ((valueIndex > -1) && (valueIndex < values.length)) {
result = values[valueIndex];
} else {
result = null;
}
}
} else {
if (dotValue != null) {
result = dotValue;
}
}
return result;
}
/**
* @return the scored chromosome window list for the second track, null if not required
* @throws InvalidChromosomeException
* @throws InterruptedException
* @throws ExecutionException
*/
public SCWList getSecondList () throws InvalidChromosomeException, InterruptedException, ExecutionException {
if (secondAlleleTrack != null) {
AlleleSettingsBedConvert alleleSettings = alleleListToConvert.get(0);;
if (alleleListToConvert.size() == 2) {
alleleSettings = alleleListToConvert.get(1);
}
return getList(alleleSettings);
}
return null;
}
/**
* Initialize the list of allele
* @throws CloneNotSupportedException
*/
private void initializeAlleleList () throws CloneNotSupportedException {
fullAlleleList = new ArrayList<AlleleSettingsBedConvert>();
fullAlleleList.add(new AlleleSettingsBedConvert(AlleleType.ALLELE01, coordinateSystem));
fullAlleleList.add(new AlleleSettingsBedConvert(AlleleType.ALLELE02, coordinateSystem));
alleleListToConvert = new ArrayList<AlleleSettingsBedConvert>();
if ((firstAlleleTrack != null) && (secondAlleleTrack != null)) {
alleleListToConvert.add(fullAlleleList.get(0));
alleleListToConvert.add(fullAlleleList.get(1));
} else if ((firstAlleleTrack != null) && (secondAlleleTrack == null)) {
alleleListToConvert.add(fullAlleleList.get(0));
} else if ((firstAlleleTrack == null) && (secondAlleleTrack != null)) {
alleleListToConvert.add(fullAlleleList.get(1));
}
}
@Override
protected void process() throws Exception {
// Retrieve the index of the column of the genome in the VCF
genomeIndex = fileScanner.getCurrentVCFReader().getReader().getIndexFromGenome(fullGenomeName);
// Compute the file scan algorithm
fileScanner.compute();
}
@Override
public void processLine(FileScannerInterface fileAlgorithm) throws IOException {
cpt++;
VCFLine currentLine = fileAlgorithm.getCurrentLine();
currentLine.processForAnalyse();
String gt = currentLine.getFormatField(genomeIndex, 0).toString();
if (gt.length() == 3) {
Chromosome chromosome = currentLine.getChromosome();
int[] lengths = VCFLineUtility.getVariantLengths(currentLine.getREF(), Utils.split(currentLine.getALT(), ','), currentLine.getINFO());
for (AlleleSettingsBedConvert alleleExport: fullAlleleList) {
int altIndex = VCFLineUtility.getAlleleIndex(gt.charAt(alleleExport.getCharIndex()));
alleleExport.initializeCurrentInformation(lengths, currentLine, altIndex);
}
AlleleSettingsBedConvert firstAllele = fullAlleleList.get(0);
AlleleSettingsBedConvert secondAllele = fullAlleleList.get(1);
firstAllele.updateCurrentInformation(secondAllele, chromosome);
secondAllele.updateCurrentInformation(firstAllele, chromosome);
firstAllele.finalizePosition();
secondAllele.finalizePosition();
for (AlleleSettingsBedConvert alleleExport: alleleListToConvert) {
Object score = getScore(currentLine, alleleExport);
if (score != null) {
try {
alleleExport.addCurrentInformation(chromosome, score, includeReferences, includeNoCall);
} catch (Exception e) {
System.err.println(currentLine);
}
} else {
//System.err.println("The line could not be exported. It seems the ID '" + header.getId() + "' has not been found in the line: " + currentLine.toString());
}
}
}
}
@Override
public void processLine(VCFLine src, VCFLine dest) throws IOException {}
}