/* * Copyright 2015 Google. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.cloud.genomics.dataflow.functions.verifybamid; import com.google.cloud.genomics.dataflow.model.ReadCounts; import com.google.cloud.genomics.dataflow.model.ReadQualityCount; import com.google.cloud.genomics.dataflow.model.ReadQualityCount.Base; import com.google.common.collect.ImmutableMap; import com.google.genomics.v1.Position; import org.apache.commons.math3.analysis.UnivariateFunction; import org.apache.commons.math3.geometry.euclidean.oned.Interval; import org.apache.commons.math3.util.Precision; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import java.util.Map; import junit.framework.TestCase; /** * Unit tests for Solver. */ @RunWith(JUnit4.class) public class SolverTest extends TestCase { /** Parabola with maximum value at a specified point */ public class Parabola implements UnivariateFunction { private final double max; /** Create a Parabola with maximum at the specified value */ Parabola(double max) { this.max = max; } @Override public double value(double x) { return 1.0 - Math.pow(x - this.max, 2.0); } } @Test public void testGridSearch() { Interval interval = Solver.gridSearch(new Parabola(0.25), 0.0, 1.0, 0.1); assertEquals(Precision.compareTo(interval.getInf(), 0.1, 1), 0); assertEquals(Precision.compareTo(interval.getSup(), 0.3, 1), 0); interval = Solver.gridSearch(new Parabola(1.2), 0.0, 1.0, 0.1); assertEquals(Precision.compareTo(interval.getInf(), 0.9, 1), 0); assertEquals(Precision.compareTo(interval.getSup(), 1.0, 1), 0); interval = Solver.gridSearch(new Parabola(1.2), 0.0, 1.0, 0.3); assertEquals(Precision.compareTo(interval.getInf(), 0.9, 1), 0); assertEquals(Precision.compareTo(interval.getSup(), 1.0, 1), 0); } @Test public void testMaximize() { assertEquals(Precision.compareTo(Solver.maximize(new Parabola(0.47), 0.0, 1.0, 0.1, 0.00001, 0.00001, 100, 100), 0.47, 0.00001), 0); } @Test public void testSolverOnKnownLikelihoodCases() { int phred = 200; Position position1 = Position.newBuilder() .setReferenceName("1") .setPosition(123L) .build(); /* * Observe 900 REF reads and 100 NONREF * P(REF) = 0.8 * error probability is near 0 * Most likely explanation should be ~20% contamination * (if P(REF) were 0.5, we'd have peaks at 10% (nonref homozygous contaminant) * and 20% (heterozygous contaminant) */ ImmutableMap.Builder<Position, ReadCounts> countsBuilder = ImmutableMap.builder(); ReadCounts rc = new ReadCounts(); rc.setRefFreq(0.8); ReadQualityCount rqc = new ReadQualityCount(); rqc.setBase(Base.REF); rqc.setQuality(phred); rqc.setCount(900); rc.addReadQualityCount(rqc); rqc = new ReadQualityCount(); rqc.setBase(Base.NONREF); rqc.setQuality(phred); rqc.setCount(100); rc.addReadQualityCount(rqc); rqc = new ReadQualityCount(); rqc.setBase(Base.OTHER); rqc.setQuality(1); rqc.setCount(2); rc.addReadQualityCount(rqc); countsBuilder.put(position1, rc); Map<Position, ReadCounts> readCounts = countsBuilder.build(); assertEquals(Precision.compareTo(Solver.maximize( new LikelihoodFn(readCounts), 0.0, 0.5, 0.05, 0.0001, 0.0001, 100, 100), 0.2, 0.0001), 0); /* * Make sure things are symmetrical. Observe 900 NONREF reads and 100 REF * P(NONREF) = 0.8 (i.e. P(REF) = 0.2) * error probability is near 0 * Most likely explanation should be ~20% contamination * (if P(REF) were 0.5, we'd have peaks at 10% (nonref homozygous contaminant) * and 20% (heterozygous contaminant) */ countsBuilder = ImmutableMap.builder(); rc = new ReadCounts(); rc.setRefFreq(0.2); rqc = new ReadQualityCount(); rqc.setBase(Base.NONREF); rqc.setQuality(phred); rqc.setCount(900); rc.addReadQualityCount(rqc); rqc = new ReadQualityCount(); rqc.setBase(Base.REF); rqc.setQuality(phred); rqc.setCount(100); rc.addReadQualityCount(rqc); countsBuilder.put(position1, rc); readCounts = countsBuilder.build(); assertEquals(Precision.compareTo(Solver.maximize( new LikelihoodFn(readCounts), 0.0, 0.5, 0.05, 0.0001, 0.0001, 100, 100), 0.2, 0.0001), 0); /* * Assume a heterozygous desired base pair with a homozygous contaminant. * Observe 450 NONREF reads and 550 REF * P(REF) = 0.8 * error probability is near 0 * Most likely explanation should be ~10% contamination */ countsBuilder = ImmutableMap.builder(); rc = new ReadCounts(); rc.setRefFreq(0.5); rqc = new ReadQualityCount(); rqc.setBase(Base.NONREF); rqc.setQuality(phred); rqc.setCount(450); rc.addReadQualityCount(rqc); rqc = new ReadQualityCount(); rqc.setBase(Base.REF); rqc.setQuality(phred); rqc.setCount(550); rc.addReadQualityCount(rqc); countsBuilder.put(position1, rc); readCounts = countsBuilder.build(); assertEquals(Precision.compareTo(Solver.maximize( new LikelihoodFn(readCounts), 0.0, 0.5, 0.05, 0.0001, 0.0001, 100, 100), 0.1, 0.0001), 0); } }