/* * Copyright (c) 2012 The Broad Institute * * Permission is hereby granted, free of charge, to any person * obtaining a copy of this software and associated documentation * files (the "Software"), to deal in the Software without * restriction, including without limitation the rights to use, * copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following * conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR * THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ package htsjdk.variant.vcf; import htsjdk.tribble.TribbleException; import java.util.HashMap; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; /** * A class for translating between vcf header versions */ public class VCFHeaderLineTranslator { private static Map<VCFHeaderVersion,VCFLineParser> mapping; static { mapping = new HashMap<VCFHeaderVersion,VCFLineParser>(); mapping.put(VCFHeaderVersion.VCF4_0,new VCF4Parser()); mapping.put(VCFHeaderVersion.VCF4_1,new VCF4Parser()); mapping.put(VCFHeaderVersion.VCF4_2,new VCF4Parser()); mapping.put(VCFHeaderVersion.VCF3_3,new VCF3Parser()); mapping.put(VCFHeaderVersion.VCF3_2,new VCF3Parser()); } public static Map<String,String> parseLine(VCFHeaderVersion version, String valueLine, List<String> expectedTagOrder) { return mapping.get(version).parseLine(valueLine,expectedTagOrder); } } interface VCFLineParser { public Map<String,String> parseLine(String valueLine, List<String> expectedTagOrder); } /** * a class that handles the to and from disk for VCF 4 lines */ class VCF4Parser implements VCFLineParser { /** * parse a VCF4 line * @param valueLine the line * @return a mapping of the tags parsed out */ public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder) { // our return map Map<String, String> ret = new LinkedHashMap<String, String>(); // a builder to store up characters as we go StringBuilder builder = new StringBuilder(); // store the key when we're parsing out the values String key = ""; // where are we in the stream of characters? int index = 0; // are we inside a quotation? we don't special case ',' then boolean inQuote = false; // a little switch machine to parse out the tags. Regex ended up being really complicated and ugly [yes, but this machine is getting ugly now... MAD] for (char c: valueLine.toCharArray()) { if ( c == '\"' ) { inQuote = ! inQuote; } else if ( inQuote ) { builder.append(c); } else { switch (c) { case ('<') : if (index == 0) break; // if we see a open bracket at the beginning, ignore it case ('>') : if (index == valueLine.length()-1) ret.put(key,builder.toString().trim()); break; // if we see a close bracket, and we're at the end, add an entry to our list case ('=') : key = builder.toString().trim(); builder = new StringBuilder(); break; // at an equals, copy the key and reset the builder case (',') : ret.put(key,builder.toString().trim()); builder = new StringBuilder(); break; // drop the current key value to the return map default: builder.append(c); // otherwise simply append to the current string } } index++; } // validate the tags against the expected list index = 0; if ( expectedTagOrder != null ) { if ( ret.size() > expectedTagOrder.size() ) throw new TribbleException.InvalidHeader("unexpected tag count " + ret.size() + " in line " + valueLine); for ( String str : ret.keySet() ) { if ( !expectedTagOrder.get(index).equals(str) ) throw new TribbleException.InvalidHeader("Unexpected tag " + str + " in line " + valueLine); index++; } } return ret; } } class VCF3Parser implements VCFLineParser { public Map<String, String> parseLine(String valueLine, List<String> expectedTagOrder) { // our return map Map<String, String> ret = new LinkedHashMap<String, String>(); // a builder to store up characters as we go StringBuilder builder = new StringBuilder(); // where are we in the stream of characters? int index = 0; // where in the expected tag order are we? int tagIndex = 0; // are we inside a quotation? we don't special case ',' then boolean inQuote = false; // a little switch machine to parse out the tags. Regex ended up being really complicated and ugly for (char c: valueLine.toCharArray()) { switch (c) { case ('\"') : inQuote = !inQuote; break; // a quote means we ignore ',' in our strings, keep track of it case (',') : if (!inQuote) { ret.put(expectedTagOrder.get(tagIndex++),builder.toString()); builder = new StringBuilder(); break; } // drop the current key value to the return map default: builder.append(c); // otherwise simply append to the current string } index++; } ret.put(expectedTagOrder.get(tagIndex++),builder.toString()); // validate the tags against the expected list index = 0; if (tagIndex != expectedTagOrder.size()) throw new IllegalArgumentException("Unexpected tag count " + tagIndex + ", we expected " + expectedTagOrder.size()); for (String str : ret.keySet()){ if (!expectedTagOrder.get(index).equals(str)) throw new IllegalArgumentException("Unexpected tag " + str + " in string " + valueLine); index++; } return ret; } }