/*******************************************************************************
* GenPlay, Einstein Genome Analyzer
* Copyright (C) 2009, 2014 Albert Einstein College of Medicine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
* Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu>
* Nicolas Fourel <nicolas.fourel@einstein.yu.edu>
* Eric Bouhassira <eric.bouhassira@einstein.yu.edu>
*
* Website: <http://genplay.einstein.yu.edu>
******************************************************************************/
package edu.yu.einstein.genplay.core.multiGenome.VCF.VCFFile;
import java.io.File;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import edu.yu.einstein.genplay.core.manager.ProjectFiles;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFLine;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFHeaderType.VCFHeaderAdvancedType;
import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFStatistics.VCFFileFullStatistic;
import edu.yu.einstein.genplay.dataStructure.chromosome.Chromosome;
import edu.yu.einstein.genplay.dataStructure.enums.VariantType;
import edu.yu.einstein.genplay.dataStructure.list.primitiveList.PrimitiveList;
/**
* This class handles VCF files.
* It indexes information to perform fast queries.
* It also gets VCF header information.
* @author Nicolas Fourel
* @version 0.1
*/
public class VCFFile implements Serializable {
/** Default generated serial version ID */
private static final long serialVersionUID = 7316097355767936880L; // generated ID
private static final int SAVED_FORMAT_VERSION_NUMBER = 0; // saved format version
private File file; // VCF file
private VCFHeader header; // Information about the header
private VCFReader reader; // Reader for the VCF file
private VCFFileFullStatistic statistics; // VCF file statistics
private Map<String, List<VariantType>> variantTypeList; // List of the different variant type contained in the VCF file and sorted by genome name
private List<Integer> positionList; // reference genome position array (indexes match with the boolean list of filters)
private Chromosome chromosomeOfList;
/**
* Constructor of {@link VCFFile}
* @param file the VCF file
* @throws IOException
* @throws URISyntaxException
*/
public VCFFile (File file) throws IOException, URISyntaxException {
this.file = file;
positionList = null;
variantTypeList = new HashMap<String, List<VariantType>>();
reader = new VCFReader();
header = new VCFHeader();
statistics = new VCFFileFullStatistic();
indexVCFFile();
header.processHeader(reader);
reader.setColumnNames(header.getColumnNames());
chromosomeOfList = null;
}
/**
* Add a genome name to the list of genome name
* @param genomeName a full genome name
*/
public void addGenomeName (String genomeName) {
header.addGenomeName(genomeName);
statistics.addGenomeName(genomeName);
}
/**
* Add a type of variant if it is not already present in the list.
* @param genomeName name of the genome
* @param type variant type to add
*/
public void addVariantType (String genomeName, VariantType type) {
if (!variantTypeList.containsKey(genomeName)) {
variantTypeList.put(genomeName, new ArrayList<VariantType>());
}
if (!variantTypeList.get(genomeName).contains(type)) {
variantTypeList.get(genomeName).add(type);
}
}
/**
* Checks if this VCF contains the information for the given genome and a variation type
* @param genomeName the name of the genome
* @param variantType the type of the variation
* @return true if this VCF can manage the request
*/
public boolean canManage (String genomeName, VariantType variantType) {
if ((getVariantTypes(genomeName) != null) && getVariantTypes(genomeName).contains(variantType)) {
return true;
}
return false;
}
private boolean chromosomeHasChanged (Chromosome chromosome) {
if (chromosomeOfList == null) {
return true;
}
if (!chromosomeOfList.equals(chromosome)){
return true;
}
return false;
}
@Override
public boolean equals(Object obj) {
if(this == obj){
return true;
}
if((obj == null) || (obj.getClass() != this.getClass())) {
return false;
}
// object must be Test at this point
VCFFile test = (VCFFile)obj;
return file.getAbsolutePath().equals(test.getFile().getAbsoluteFile());
}
/**
* @return the vcf
*/
public File getFile() {
return file;
}
/**
* Gets the value of the FORMAT field and a specific field
* @param value the FORMAT string
* @param field the specific field
* @return the value of the specific field of the FORMAT field
*/
public Object getFormatValue (String value, String field) {
Object result = null;
List<VCFHeaderAdvancedType> formatHeader = header.getFormatHeader();
int indexInList = getIndex(formatHeader, field);
if (indexInList != -1) {
Class<?> type = formatHeader.get(indexInList).getType();
if (type == Integer.class) {
try {
result = Integer.parseInt(value);
} catch (Exception e) {
result = value;
}
} else if (type == Float.class) {
result = Float.parseFloat(value);
} else if (type == char.class) {
result = value.charAt(0);
} else if (type == String.class) {
result = value;
}
}
return result;
}
/**
* @return the header
*/
public VCFHeader getHeader() {
return header;
}
/**
* Gets the index of a specific ID field in a advanced type header list
* @param list the advanced type header list
* @param id the specific ID field
* @return the index
*/
private int getIndex (List<VCFHeaderAdvancedType> list, String id) {
boolean found = false;
int index = 0;
while (!found && (index < list.size())) {
if (id.equals(list.get(index).getId())) {
found = true;
} else {
index++;
}
}
if (found) {
return index;
} else {
return -1;
}
}
/**
* Gets the value according to the INFO field and a specific field
* @param info the INFO string
* @param field the specific field
* @return the value of the specific field of the INFO field
*/
public Object getInfoValues (String info, String field) {
Object result = null;
List<VCFHeaderAdvancedType> infoHeader = header.getInfoHeader();
int indexInList = getIndex(infoHeader, field);
if (indexInList != -1) {
int indexInString = info.indexOf(field);
if (indexInString != -1) {
Class<?> type = infoHeader.get(indexInList).getType();
if (type == Boolean.class) {
result = true;
} else {
int start = indexInString + field.length() + 1;
int stop = info.indexOf(";", start);
if (stop == -1) {
stop = info.length();
}
String value = info.substring(start, stop);
if (type == Integer.class) {
result = Integer.parseInt(value);
} else if (type == Float.class) {
result = Float.parseFloat(value);
} else if (type == char.class) {
result = value.charAt(0);
} else if (type == String.class) {
result = value;
}
}
}
}
return result;
}
/**
* @return the positionList
*/
public List<Integer> getPositionList() {
return positionList;
}
/**
* @return the reader
*/
public VCFReader getReader() {
return reader;
}
/**
* @return the statistics
*/
public VCFFileFullStatistic getStatistics() {
return statistics;
}
/**
* @param genomeName genome name
* @return the list of variant type present in this vcf for this genome
*/
public List<VariantType> getVariantTypes (String genomeName) {
if (variantTypeList.containsKey(genomeName)) {
return variantTypeList.get(genomeName);
}
return null;
}
/**
* This method indexes the VCF file using the Tabix Java API.
* @throws IOException
* @throws URISyntaxException
*/
private void indexVCFFile () throws IOException, URISyntaxException {
if (!isVCFIndexed ()) {
file = ProjectFiles.getInstance().getValidFileOf(file);
if (reader == null) {
reader = new VCFReader();
}
reader.indexVCFFile(file);
}
}
/**
* Initializes the list of reference genome position for this reader.
* It is required when using VCF Filters.
* @param chromosome the current chromosome
* @param results the list of result
*/
public void initializePositionList (Chromosome chromosome, List<VCFLine> results) {
if (chromosomeHasChanged(chromosome)) {
if (results != null) {
positionList = new PrimitiveList<Integer>(Integer.class, results.size());
//VCFLine line = new VCFLine(null, null);
for (int i = 0; i < results.size(); i++) {
//line.initialize(results.get(i), null);
positionList.add(results.get(i).getReferencePosition());
}
} else {
positionList = new PrimitiveList<Integer>(Integer.class, 0);
}
}
}
/**
* This method checks if the VCF has been indexed.
* @return true if the VCF is indexed
*/
private boolean isVCFIndexed () {
if ((reader != null) && (reader.getVCFParser() != null)) {
return true;
}
return false;
}
/**
* Method used for unserialization
* @param in
* @throws IOException
* @throws ClassNotFoundException
* @throws URISyntaxException
*/
@SuppressWarnings("unchecked")
private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException, URISyntaxException {
in.readInt();
file = (File) in.readObject();
header = (VCFHeader) in.readObject();
statistics = (VCFFileFullStatistic) in.readObject();
variantTypeList = (Map<String, List<VariantType>>) in.readObject();
positionList = (List<Integer>) in.readObject();
chromosomeOfList = (Chromosome) in.readObject();
indexVCFFile(); // recreate the tabix reader
reader.setColumnNames(header.getColumnNames());
}
@Override
public String toString () {
return file.getName();
}
/**
* Method used for serialization
* @param out
* @throws IOException
*/
private void writeObject(ObjectOutputStream out) throws IOException {
out.writeInt(SAVED_FORMAT_VERSION_NUMBER);
out.writeObject(file);
out.writeObject(header);
out.writeObject(statistics);
out.writeObject(variantTypeList);
out.writeObject(positionList);
out.writeObject(chromosomeOfList);
}
}