/******************************************************************************* * GenPlay, Einstein Genome Analyzer * Copyright (C) 2009, 2014 Albert Einstein College of Medicine * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. * Authors: Julien Lajugie <julien.lajugie@einstein.yu.edu> * Nicolas Fourel <nicolas.fourel@einstein.yu.edu> * Eric Bouhassira <eric.bouhassira@einstein.yu.edu> * * Website: <http://genplay.einstein.yu.edu> ******************************************************************************/ package edu.yu.einstein.genplay.core.multiGenome.utils; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFLine; import edu.yu.einstein.genplay.core.multiGenome.VCF.VCFHeaderType.VCFHeaderType; import edu.yu.einstein.genplay.core.multiGenome.data.display.variant.Variant; import edu.yu.einstein.genplay.core.multiGenome.operation.synchronization.MGSynchronizer; import edu.yu.einstein.genplay.dataStructure.enums.VCFColumnName; import edu.yu.einstein.genplay.dataStructure.enums.VariantType; import edu.yu.einstein.genplay.util.Utils; /** * @author Nicolas Fourel * @version 0.1 */ public class VCFLineUtility { /** * Looks in a line for an ID in order to return its value * @param line part of a VCF line * @param ID ID name * @return the value of the ID */ public static Object getIDValue (String line, String ID) { int indexID; try { indexID = line.indexOf(ID); } catch (Exception e) { System.out.println("line: " + line + "; ID: " + ID); } indexID = line.indexOf(ID); //int indexID = line.indexOf(ID); int indexValue = indexID + ID.length() + 1; int indexEnd = line.indexOf(";", indexValue); if (indexEnd == -1) { indexEnd = line.length(); } return line.substring(indexValue, indexEnd); } /** * Retrieves a String value within a string. * According to the column, the value can be the full line associated to the current column, or part of it. * @param line the VCF line (as a map String/Object) * @param header the header that contains the ID to look for * @param genomeRawName the line associated to a genome ONLY FOR FORMAT FILTER (can be null otherwise) * @return the string value of the ID, null otherwise */ public static String getValue (VCFLine line, VCFHeaderType header, String genomeRawName) { VCFColumnName columnName = header.getColumnCategory(); String fieldLine = line.getValueFromColumn(columnName); String result = null; if (columnName == VCFColumnName.ALT) { // Columns ALT, QUAL, FILTER are not composed of different ID result = fieldLine; // the value to get is necessary the full line! } else if (columnName == VCFColumnName.QUAL) { result = fieldLine; } else if (columnName == VCFColumnName.FILTER) { result = fieldLine; } else if (columnName == VCFColumnName.INFO) { // Columns INFO and FORMAT gather different ID (; or : delimited) result = getInfoValue(line, header); // a more complex process is used to locate and retrieve the ID value } else if (columnName == VCFColumnName.FORMAT) { result = getFormatValue(line, header, genomeRawName); // a more complex process is used to locate and retrieve the ID value } return result; } /** * Gets the value according to the INFO field and a specific field * @param line the VCF line * @param header the header (containing the ID field) * @return the value of the specific field of the INFO field */ public static String getInfoValue (VCFLine line, VCFHeaderType header) { String info = line.getINFO(); return getInfoValue(info, header); } /** * Gets the value according to the INFO field and a specific field * @param info the info part of the line * @param header the header (containing the ID field) * @return the value of the specific field of the INFO field */ public static String getInfoValue (String info, VCFHeaderType header) { String result = null; int indexStart = info.indexOf(header.getId()); if (indexStart != -1) { indexStart += header.getId().length() + 1; int indexStop = info.indexOf(";", indexStart); if (indexStop == -1) { indexStop = info.length(); } result = info.substring(indexStart, indexStop); } return result; } /** * Gets the value according to the FORMAT field and a specific field * @param line the VCF line * @param header the header (containing the ID field) * @param genomeRawName the genome raw name * @return the value of the specific field of the FORMAT field */ public static String getFormatValue (VCFLine line, VCFHeaderType header, String genomeRawName) { return getFormatValue(line.getFORMAT(), line.getFormatValues(genomeRawName), header); } /** * Gets the value according to the FORMAT field and a specific field * @param lineFormat the FORMAT line * @param genomeFormat the format field of the related genome * @param header the header * @return the value of the field, null otherwise */ public static String getFormatValue (String lineFormat, String genomeFormat, VCFHeaderType header) { String[] genomeFormats = Utils.split(genomeFormat, ':'); return getFormatValue(lineFormat, genomeFormats, header); } /** * Gets the value according to the FORMAT field and a specific field * @param lineFormat the FORMAT line * @param genomeFormat the parsed format field of the related genome * @param header the header * @return the value of the field, null otherwise */ public static String getFormatValue (String lineFormat, String[] genomeFormat, VCFHeaderType header) { String[] format = Utils.split(lineFormat, ':'); int idIndex = -1; for (int i = 0; i < format.length; i++) { if (format[i].equals(header.getId())) { idIndex = i; } } String result = null; if (idIndex != -1) { if (idIndex < genomeFormat.length) { result = genomeFormat[idIndex]; } } return result; } /** * @param variant a variant * @return the VCF line of the variant */ public static VCFLine getVCFLine (Variant variant) { return null; } /*private static VCFLine getRightInformation (Variant variant, List<String> results, List<VCFFile> vcfFiles) { if (results.size() > 0) { float variantScore = variant.getScore(); for (int i = 0; i < results.size(); i++) { String result = results.get(i); float currentScore = getQUALFromResult(result); if (variantScore == currentScore) { return new VCFLine(result, vcfFiles.get(i).getHeader()); } } } return null; }*/ /** * Retrieves the QUAL field from a result line * @param result the result * @return the quality as a float or 0 is the QUAL field is not valid (eg: '.') */ /*private static float getQUALFromResult (String result) { String[] array = Utils.splitWithTab(result); float qual = 0; try { qual = Float.parseFloat(array[5].toString()); } catch (Exception e) {} return qual; }*/ /** * Transforms a character into its allele index. * The char 1 will refer to the first alternative located at the index 0 of any arrays. * The char 0 returns -1 and the char '.' returns -2 and don't refer to any alternatives. * @param alleleChar the character * @return the associated code (char - 1) */ public static int getAlleleIndex (char alleleChar) { return getAlleleIndex(alleleChar + ""); } /** * Transforms a character into its allele index. * The char 1 will refer to the first alternative located at the index 0 of any arrays. * The char 0 returns -1 and the char '.' returns -2 and don't refer to any alternatives. * @param alleleChar the character * @return the associated code (char - 1) */ public static int getAlleleIndex (String alleleChar) { int alleleIndex = -1; if (alleleChar.equals(".")) { alleleIndex = MGSynchronizer.NO_CALL; } else if (alleleChar.equals("0")) { alleleIndex = MGSynchronizer.REFERENCE; } else { try { alleleIndex = Integer.parseInt(alleleChar) - 1; } catch (Exception e) {} } return alleleIndex; } /** * Retrieves the length of all defined alternatives * If an alternative is SV coded, the info field is required * @param reference the REF field * @param alternatives the parsed ALT field * @param info the INFO field * @return an array of integer as lengths */ public static int[] getVariantLengths(String reference, String[] alternatives, String info) { int[] lengths = new int[alternatives.length]; for (int i = 0; i < alternatives.length; i++) { lengths[i] = retrieveVariantLength(reference, alternatives[i], info); } return lengths; } /** * Defines the variant type according to several lengths * @param length array of length * @return an array of variant types */ public static VariantType[] getVariantTypes (int[] length) { VariantType[] variantTypes = new VariantType[length.length]; for (int i = 0; i < length.length; i++) { variantTypes[i] = getVariantType(length[i]); } return variantTypes; } /** * Retrieves the length of a variation using the reference and the alternative. * If the alternative is a structural variant, the length is given by the SVLEN INFO attributes * @param reference REF field * @param alternative ALT field * @param info INFO field * @return the length of the variation */ public static int retrieveVariantLength (String reference, String alternative, String info) { int length = 0; if (isStructuralVariant(alternative)) { String lengthPattern = "SVLEN="; int lengthPatternIndex = info.indexOf(lengthPattern) + lengthPattern.length(); int nextCommaIndex = info.indexOf(";", lengthPatternIndex); if (nextCommaIndex == -1) { length = Integer.parseInt(info.substring(lengthPatternIndex)); } else { length = Integer.parseInt(info.substring(lengthPatternIndex, nextCommaIndex)); } } else { length = alternative.length() - reference.length(); } return length; } /** * Tests the length of a variation to find its type out. * @param variationLength length of the variation * @return the variation type {@link VariantType} */ public static VariantType getVariantType (int variationLength) { if (variationLength < 0) { return VariantType.DELETION; } else if (variationLength > 0) { return VariantType.INSERTION; } else if (variationLength == 0) { return VariantType.SNPS; } else { return null; } } /** * @param alternative ALT field (or part of it) * @return true if the given alternative is coded as an SV */ public static boolean isStructuralVariant (String alternative) { if (alternative.charAt(0) == '<') { return true; } return false; } }