/* * Copyright 2015-2016 OpenCB * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.opencb.opencga.storage.core.io; import htsjdk.variant.variantcontext.VariantContext; import htsjdk.variant.vcf.VCFCodec; import htsjdk.variant.vcf.VCFHeader; import htsjdk.variant.vcf.VCFHeaderVersion; import org.opencb.biodata.formats.variant.vcf4.FullVcfCodec; import org.opencb.biodata.models.variant.Variant; import org.opencb.biodata.models.variant.VariantNormalizer; import org.opencb.biodata.tools.variant.converters.avro.VariantContextToVariantConverter; import org.opencb.biodata.tools.variant.stats.VariantGlobalStatsCalculator; import org.opencb.commons.io.DataReader; import org.opencb.opencga.storage.core.io.plain.StringDataReader; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.nio.file.Path; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicLong; import java.util.stream.Collectors; /** * Created by mh719 on 04/05/16. */ @Deprecated public class VcfVariantReader implements DataReader<Variant> { protected static Logger logger = LoggerFactory.getLogger(VcfVariantReader.class); protected final DataReader<String> reader; protected final VCFCodec vcfCodec; protected final VariantContextToVariantConverter converter; protected final VariantNormalizer normalizer; protected final VariantGlobalStatsCalculator variantStatsTask; protected final AtomicLong timesOverall = new AtomicLong(0); // protected final AtomicLong timeHts = new AtomicLong(0); // protected final AtomicLong timeAvro = new AtomicLong(0); // protected final AtomicLong timeNorm = new AtomicLong(0); // protected final AtomicLong timeStats = new AtomicLong(0); public VcfVariantReader( DataReader<String> reader, VCFHeader header, VCFHeaderVersion version, VariantContextToVariantConverter converter, VariantGlobalStatsCalculator variantStatsTask, VariantNormalizer normalizer) { this.vcfCodec = new FullVcfCodec(header, version); this.converter = converter; this.normalizer = normalizer; this.reader = reader; this.variantStatsTask = variantStatsTask; } public VcfVariantReader( Path path, VCFHeader header, VCFHeaderVersion version, VariantContextToVariantConverter converter, VariantGlobalStatsCalculator variantStatsTask, VariantNormalizer normalizer) { this(new StringDataReader(path), header, version, converter, variantStatsTask, normalizer); } protected List<Variant> processLine(String line) { // long curr = System.currentTimeMillis(); VariantContext htsVar = this.vcfCodec.decode(line); // this.timeHts.addAndGet(System.currentTimeMillis() - curr); // curr = System.currentTimeMillis(); Variant variant = this.converter.convert(htsVar); // this.timeAvro.addAndGet(System.currentTimeMillis() - curr); // curr = System.currentTimeMillis(); List<Variant> normVar = this.normalizer.apply(Collections.singletonList(variant)); // this.timeNorm.addAndGet(System.currentTimeMillis() - curr); // curr = System.currentTimeMillis(); this.variantStatsTask.apply(normVar); // this.timeStats.addAndGet(System.currentTimeMillis() - curr); return normVar; } private List<Variant> processLines(List<String> lines) { return lines.stream().filter(l -> !(l.trim().isEmpty() || l.startsWith("#"))) .map(l -> processLine(l)).flatMap(l -> l.stream()).collect(Collectors.toList()); } @Override public List<Variant> read(int batchSize) { long curr = System.currentTimeMillis(); try { List<Variant> variants = new ArrayList<>(); List<String> lines; do { lines = this.reader.read(batchSize); List<Variant> processed = processLines(lines); variants.addAll(processed); } while (variants.size() < batchSize && !lines.isEmpty()); return variants; } finally { this.timesOverall.addAndGet(System.currentTimeMillis() - curr); } } @Override public List<Variant> read() { long curr = System.currentTimeMillis(); try { return read(1); } finally { this.timesOverall.addAndGet(System.currentTimeMillis() - curr); } } @Override public boolean open() { return this.reader.open(); } @Override public boolean close() { return this.reader.close(); } @Override public boolean pre() { synchronized (variantStatsTask) { this.variantStatsTask.pre(); } return this.reader.pre(); } @Override public boolean post() { this.reader.post(); synchronized (variantStatsTask) { this.variantStatsTask.post(); } logger.info(String.format("Time read: %s", this.timesOverall.get())); // logger.info(String.format("Time txt2hts: %s", this.timeHts.get())); // logger.info(String.format("Time hts2avro: %s", this.timeAvro.get())); // logger.info(String.format("Time avro2norm: %s", this.timeNorm.get())); // logger.info(String.format("Time stats: %s", this.timeNorm.get())); return true; } }