/*
* The MIT License
*
* Copyright (c) 2014 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
package htsjdk.variant.utils;
import htsjdk.samtools.BamFileIoUtils;
import htsjdk.samtools.SAMException;
import htsjdk.samtools.SAMFileHeader;
import htsjdk.samtools.SAMSequenceDictionary;
import htsjdk.samtools.SAMTextHeaderCodec;
import htsjdk.samtools.SamReader;
import htsjdk.samtools.SamReaderFactory;
import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
import htsjdk.samtools.util.BufferedLineReader;
import htsjdk.samtools.util.CloserUtil;
import htsjdk.samtools.util.CollectionUtil;
import htsjdk.samtools.util.IOUtil;
import htsjdk.samtools.util.IntervalList;
import htsjdk.variant.vcf.VCFFileReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.util.Arrays;
import java.util.Collection;
/**
* Tiny class for automatically loading a SAMSequenceDictionary given a file
* @author farjoun on 2/25/2014
*/
public class SAMSequenceDictionaryExtractor {
enum TYPE {
FASTA(ReferenceSequenceFileFactory.FASTA_EXTENSIONS) {
@Override
SAMSequenceDictionary extractDictionary(final File reference) {
final SAMSequenceDictionary dict = ReferenceSequenceFileFactory.getReferenceSequenceFile(reference).getSequenceDictionary();
if (dict == null)
throw new SAMException("Could not find dictionary next to reference file " + reference.getAbsoluteFile());
return dict;
}
},
DICTIONARY(IOUtil.DICT_FILE_EXTENSION) {
@Override
SAMSequenceDictionary extractDictionary(final File dictionary) {
BufferedLineReader bufferedLineReader = null;
try {
bufferedLineReader = new BufferedLineReader(new FileInputStream(dictionary));
final SAMTextHeaderCodec codec = new SAMTextHeaderCodec();
final SAMFileHeader header = codec.decode(bufferedLineReader, dictionary.toString());
return header.getSequenceDictionary();
} catch (final FileNotFoundException e) {
throw new SAMException("Could not open sequence dictionary file: " + dictionary, e);
} finally {
CloserUtil.close(bufferedLineReader);
}
}
},
SAM(IOUtil.SAM_FILE_EXTENSION, BamFileIoUtils.BAM_FILE_EXTENSION) {
@Override
SAMSequenceDictionary extractDictionary(final File sam) {
final SamReader samReader = SamReaderFactory.makeDefault().open(sam);
try {
return samReader.getFileHeader().getSequenceDictionary();
} finally {
CloserUtil.close(samReader);
}
}
},
VCF(IOUtil.VCF_EXTENSIONS) {
@Override
SAMSequenceDictionary extractDictionary(final File vcf) {
VCFFileReader vcfFileReader = null;
try {
vcfFileReader = new VCFFileReader(vcf, false);
return vcfFileReader.getFileHeader().getSequenceDictionary();
} finally {
CloserUtil.close(vcfFileReader);
}
}
},
INTERVAL_LIST(IOUtil.INTERVAL_LIST_FILE_EXTENSION) {
@Override
SAMSequenceDictionary extractDictionary(final File intervalList) {
return IntervalList.fromFile(intervalList).getHeader().getSequenceDictionary();
}
};
final Collection<String> applicableExtensions;
TYPE(final String... s) {
applicableExtensions = CollectionUtil.makeSet(s);
}
TYPE(final Collection<String> extensions) {
applicableExtensions = extensions;
}
abstract SAMSequenceDictionary extractDictionary(final File file);
static TYPE forFile(final File dictionaryExtractable) {
for (final TYPE type : TYPE.values()) {
for (final String s : type.applicableExtensions) {
if (dictionaryExtractable.getName().endsWith(s)) {
return type;
}
}
}
throw new SAMException("Cannot figure out type of file " + dictionaryExtractable.getAbsolutePath() + " from extension. Current implementation understands the following types: " + Arrays.toString(TYPE.values()));
}
@Override
public String toString() {
return super.toString() + ": " + applicableExtensions.toString();
}
}
public static SAMSequenceDictionary extractDictionary(final File file) {
return TYPE.forFile(file).extractDictionary(file);
}
}