/*
* Copyright 2015-2016 OpenCB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.opencb.opencga.storage.core.io;
import htsjdk.tribble.readers.LineIterator;
import htsjdk.variant.vcf.VCFHeader;
import htsjdk.variant.vcf.VCFHeaderVersion;
import org.junit.Before;
import org.junit.Test;
import org.mockito.Mockito;
import org.opencb.biodata.formats.variant.vcf4.FullVcfCodec;
import org.opencb.biodata.models.variant.Variant;
import org.opencb.biodata.models.variant.VariantNormalizer;
import org.opencb.biodata.tools.variant.converters.avro.VariantContextToVariantConverter;
import org.opencb.biodata.tools.variant.stats.VariantGlobalStatsCalculator;
import org.opencb.commons.io.DataReader;
import org.opencb.opencga.storage.core.io.VcfVariantReader;
import java.io.ByteArrayInputStream;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import static org.junit.Assert.*;
/**
* Created by mh719 on 04/05/16.
*/
public class VcfVariantReaderTest {
public VcfVariantReader reader;
@Before
public void setup(){
reader = createReader(-1);
}
public static VcfVariantReader createReader(final int size) {
VariantNormalizer normalizer = new VariantNormalizer();
VariantGlobalStatsCalculator stats = Mockito.mock(VariantGlobalStatsCalculator.class);
VariantContextToVariantConverter converter = new VariantContextToVariantConverter("1","1", Arrays.asList(
"ABC"));
VCFHeaderVersion version = VCFHeaderVersion.VCF4_2;
VCFHeader header = buildHeader();
DataReader<String> dr = new DataReader<String>() {
private final String line = "1\t%s\t.\tT\tA\t.\tPASS\t.\tGT\t0/1";
// private final List<String> one = Collections.singletonList(line);
private int count = 0;
@Override
public List<String> read() {
if (size != -1 && count > size)
return Collections.emptyList();
++ count;
return Collections.singletonList(String.format(line, count));
}
@Override
public List<String> read(int batchSize) {
if (size != -1 && count > size)
return Collections.emptyList();
List<String> arr = new ArrayList<>(batchSize);
for (int i = 0; i < batchSize; i++) {
if (size != -1 && count > size)
break;
++count;
arr.add(String.format(line,count));
}
return arr;
}
};
return new VcfVariantReader(dr, header, version, converter, stats, normalizer);
}
@Test
public void testProcessLine() throws Exception {
List<Variant> vars = reader.read();
assertEquals("Expect only one Variant inline",1,vars.size());
Variant variant = vars.get(0);
assertEquals("Reference wrong", "T", variant.getReference());
assertEquals("Alt wrong", "A", variant.getAlternate());
vars = reader.read(22);
assertEquals("Expect only one Variant inline",22,vars.size());
}
public void testBatch(){
long curr = System.currentTimeMillis();
for (int i = 0; i < 10000; i++) {
List<Variant> lst = reader.read(1);
lst.size();
}
System.out.println("Run for " + (System.currentTimeMillis() - curr) + " and measured " + reader.timesOverall.get());
}
public void testRuntime() {
String line = "1\t1\t.\tT\tA\t.\tPASS\t.\tGT\t0/1";
for (int i = 0; i < 5; i++) {
long curr = System.currentTimeMillis();
for (int j = 0; j < 100000; j++) {
List<Variant> vars = reader.processLine(line);
}
System.out.println("Run for " + (System.currentTimeMillis() - curr));
}
}
public static VCFHeader buildHeader(){
FullVcfCodec codec = new FullVcfCodec();
byte[] buf = ("##fileformat=VCFv4.1\n"
+ "##FORMAT=<ID=GT,Number=1,Type=String,Description=\"Genotype\">\n"
+ "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tABC\n").getBytes();
InputStream fileInputStream = new ByteArrayInputStream(buf);
LineIterator lineIterator = codec.makeSourceFromStream(fileInputStream);
VCFHeader header = (VCFHeader) codec.readActualHeader(lineIterator);
return header;
}
}