package org.opencb.hpg.bigdata.core.lib;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.sql.SparkSession;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import java.nio.file.Path;
import java.nio.file.Paths;
/**
* Created by joaquin on 8/21/16.
*/
public class AlignmentDatasetTest {
static AlignmentDataset ad;
static SparkConf sparkConf;
static SparkSession sparkSession;
@BeforeClass
public static void setup() {
// it doesn't matter what we set to spark's home directory
sparkConf = SparkConfCreator.getConf("AlignmentDatasetTest", "local", 1, true, "");
System.out.println("sparkConf = " + sparkConf.toDebugString());
sparkSession = new SparkSession(new SparkContext(sparkConf));
}
@AfterClass
public static void shutdown() {
ad.sparkSession.sparkContext().stop();
}
public void initDataset() {
ad = new AlignmentDataset();
try {
Path inputPath = Paths.get(getClass().getResource("/test.bam.avro").toURI());
System.out.println(">>>> opening file " + inputPath);
ad.load(inputPath.toString(), sparkSession);
ad.printSchema();
ad.createOrReplaceTempView("bam");
} catch (Exception e) {
e.printStackTrace();
}
}
@Test
public void regionFilter() {
initDataset();
System.out.println(">>>> Running regionFilter...");
long count;
count = sparkSession.sql("select alignment.position.referenceName, alignment.position.position from bam").count();
System.out.println("count = " + count);
System.out.println("-------------------------------------- using SQL query");
sparkSession.sql("select alignment.position.referenceName, alignment.position.position, fragmentName, fragmentLength, length(alignedSequence), alignedSequence from bam where alignment.position.referenceName = \"1\" AND alignment.position.position >= 31915360 AND (alignment.position.position + length(alignedSequence)) <= 31925679").show();
System.out.println("-------------------------------------- using regionFilter");
ad.regionFilter("1:31915360-31925679").show();
System.out.println("--------------------------------------");
}
@Test
public void mapqFilter() {
initDataset();
System.out.println(">>>> Running mappingQualityFilter...");
long count;
System.out.println("-------------------------------------- using mappingQualityFilter");
ad.mappingQualityFilter(">50").show();
System.out.println("--------------------------------------");
}
@Test
public void tlenFilter() {
initDataset();
System.out.println(">>>> Running templateLengthFilter...");
long count;
System.out.println("-------------------------------------- using templateLengthFilter");
ad.templateLengthFilter(">398;<400").show();
System.out.println("--------------------------------------");
}
@Test
public void alenFilter() {
initDataset();
System.out.println(">>>> Running alignmentLengthFilter...");
long count;
System.out.println("-------------------------------------- using alignmentLengthFilter");
ad.alignmentLengthFilter(">50;<50").show();
System.out.println("--------------------------------------");
}
@Test
public void flagFilter() {
System.out.println(">>>> Running flagFilter...");
long count;
System.out.println("-------------------------------------- using flagFilter");
initDataset();
//sparkSession.sql("select * from bam").show();
ad.flagFilter("147,99").show();
initDataset();
ad.flagFilter("83", true).show();
System.out.println("--------------------------------------");
}
}