package picard.analysis.artifacts;
import htsjdk.samtools.SAMRecord;
import htsjdk.samtools.util.ListMap;
import htsjdk.samtools.util.SequenceUtil;
import htsjdk.samtools.util.StringUtil;
import picard.PicardException;
import picard.analysis.artifacts.SequencingArtifactMetrics.*;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Keeps track of artifact counts, and extracts metrics once accumulation is finished.
*/
class ArtifactCounter {
private final String sampleAlias;
private final String library;
private final Map<String, RefContext> contextMap = new HashMap<>();
private final ContextAccumulator fullContextAccumulator;
private final ContextAccumulator halfContextAccumulator;
private final ContextAccumulator zeroContextAccumulator;
private final List<PreAdapterSummaryMetrics> preAdapterSummaryMetricsList;
private final List<PreAdapterDetailMetrics> preAdapterDetailMetricsList;
private final List<BaitBiasSummaryMetrics> baitBiasSummaryMetricsList;
private final List<BaitBiasDetailMetrics> baitBiasDetailMetricsList;
private final Set<String> leadingContexts = new HashSet<>();
private final Set<String> trailingContexts = new HashSet<>();
// tuple to keep track of the different types of sub-contexts from a given reference context
protected final class RefContext {
final String ref, leading, trailing, zero;
public RefContext(final String ref, final String leading, final String trailing, final String zero) {
this.ref = ref;
this.leading = leading;
this.trailing = trailing;
this.zero = zero;
}
}
public ArtifactCounter(final String sampleAlias, final String library, final int contextSize, final boolean expectedTandemReads) {
this.sampleAlias = sampleAlias;
this.library = library;
// define the contexts
final HashSet<String> fullContexts = new HashSet<>();
for (final byte[] kmer : SequenceUtil.generateAllKmers(2 * contextSize + 1)) {
fullContexts.add(StringUtil.bytesToString(kmer));
}
final Set<String> zeroContexts = new HashSet<>();
// the half contexts specify either leading or trailing bases. the zero context is just the center.
// NB: we use N to represent a wildcard base, rather than an ambiguous base. It's assumed that all of the input
// contexts are unambiguous, and that any actual N's in the data have been dealt with elsewhere.
final String padding = StringUtil.repeatCharNTimes('N', contextSize);
for (final String context : fullContexts) {
final char centralBase = context.charAt(contextSize);
final String leading = context.substring(0, contextSize) + centralBase + padding;
final String trailing = padding + centralBase + context.substring(contextSize + 1, context.length());
final String zero = padding + centralBase + padding;
contextMap.put(context, new RefContext(context, leading, trailing, zero));
leadingContexts.add(leading);
trailingContexts.add(trailing);
zeroContexts.add(zero);
}
final Set<String> halfContexts = new HashSet<>(leadingContexts);
halfContexts.addAll(trailingContexts);
this.fullContextAccumulator = new ContextAccumulator(fullContexts, expectedTandemReads);
this.halfContextAccumulator = new ContextAccumulator(halfContexts, expectedTandemReads);
this.zeroContextAccumulator = new ContextAccumulator(zeroContexts, expectedTandemReads);
// these will get populated in the final step
preAdapterSummaryMetricsList = new ArrayList<PreAdapterSummaryMetrics>();
preAdapterDetailMetricsList = new ArrayList<PreAdapterDetailMetrics>();
baitBiasSummaryMetricsList = new ArrayList<BaitBiasSummaryMetrics>();
baitBiasDetailMetricsList = new ArrayList<BaitBiasDetailMetrics>();
}
/**
* Add a record to all the accumulators.
*/
public void countRecord(final String refContext, final char calledBase, final SAMRecord rec) {
if (this.contextMap.containsKey(refContext)) {
final RefContext contexts = contextMap.get(refContext);
this.fullContextAccumulator.countRecord(contexts.ref, calledBase, rec);
this.halfContextAccumulator.countRecord(contexts.leading, calledBase, rec);
this.halfContextAccumulator.countRecord(contexts.trailing, calledBase, rec);
this.zeroContextAccumulator.countRecord(contexts.zero, calledBase, rec);
}
}
/**
* Stop counting, tally things up, and extract metrics.
*/
public void finish() {
final ListMap<Transition, DetailPair> allDetailMetrics = getDetailMetrics();
final Map<Transition, SummaryPair> allSummaryMetrics = getSummaryMetrics();
for (final Transition transition : Transition.altValues()) {
final SummaryPair summary = allSummaryMetrics.get(transition);
final List<DetailPair> details = allDetailMetrics.get(transition);
preAdapterSummaryMetricsList.add(summary.preAdapterMetrics);
baitBiasSummaryMetricsList.add(summary.baitBiasMetrics);
for (final DetailPair detail : details) {
preAdapterDetailMetricsList.add(detail.preAdapterMetrics);
baitBiasDetailMetricsList.add(detail.baitBiasMetrics);
}
}
}
public List<PreAdapterSummaryMetrics> getPreAdapterSummaryMetrics() { return preAdapterSummaryMetricsList; }
public List<PreAdapterDetailMetrics> getPreAdapterDetailMetrics() { return preAdapterDetailMetricsList; }
public List<BaitBiasSummaryMetrics> getBaitBiasSummaryMetrics() { return baitBiasSummaryMetricsList; }
public List<BaitBiasDetailMetrics> getBaitBiasDetailMetrics() { return baitBiasDetailMetricsList; }
/**
* Core method to compute summary metrics. For each transition, we report:
* 1. the total Q-score across all contexts
* 2. the worst full context and its Q-score
* 3. the worst leading context and its Q-score
* 4. the worst trailing context and its Q-score
*
*/
private Map<Transition, SummaryPair> getSummaryMetrics() {
final Map<Transition, SummaryPair> summaryMetricsMap = new EnumMap<Transition, SummaryPair>(Transition.class);
// extract the detail metrics from each accumulator
final ListMap<Transition, DetailPair> fullMetrics = this.fullContextAccumulator.calculateMetrics(sampleAlias, library);
final ListMap<Transition, DetailPair> halfMetrics = this.halfContextAccumulator.calculateMetrics(sampleAlias, library);
final ListMap<Transition, DetailPair> zeroMetrics = this.zeroContextAccumulator.calculateMetrics(sampleAlias, library);
// compute the summary metrics - one row for each transition
for (final Transition transition : Transition.altValues()) {
final List<DetailPair> fullMetricsForTransition = fullMetrics.get(transition);
final List<DetailPair> zeroMetricsForTransition = zeroMetrics.get(transition);
if (zeroMetricsForTransition.size() != 1) {
throw new PicardException("Should have exactly one context-free metric pair for transition: " + transition);
}
// we want to report on leading / trailing contexts separately
final List<DetailPair> leadingMetricsForTransition = new ArrayList<DetailPair>();
final List<DetailPair> trailingMetricsForTransition = new ArrayList<DetailPair>();
for (final DetailPair metrics : halfMetrics.get(transition)) {
// first make sure they're the same context
if (!metrics.preAdapterMetrics.CONTEXT.equals(metrics.baitBiasMetrics.CONTEXT)) {
throw new PicardException("Input detail metrics are not matched up properly - contexts differ.");
}
final boolean isLeading = leadingContexts.contains(metrics.preAdapterMetrics.CONTEXT);
final boolean isTrailing = trailingContexts.contains(metrics.preAdapterMetrics.CONTEXT);
// if the original contextSize is 0, there's no difference between leading and trailing, so add it to both
if (isLeading) leadingMetricsForTransition.add(metrics);
if (isTrailing) trailingMetricsForTransition.add(metrics);
}
// get the worst cases
final DetailPair totalMetric = zeroMetricsForTransition.get(0);
final DetailPair worstFullMetric = getWorstMetrics(fullMetricsForTransition);
final DetailPair worstLeadingMetric = getWorstMetrics(leadingMetricsForTransition);
final DetailPair worstTrailingMetric = getWorstMetrics(trailingMetricsForTransition);
// construct the actual summary metrics - a combination of all the data we've just extracted
final PreAdapterSummaryMetrics preAdapterSummaryMetrics = new PreAdapterSummaryMetrics();
final BaitBiasSummaryMetrics baitBiasSummaryMetrics = new BaitBiasSummaryMetrics();
preAdapterSummaryMetrics.SAMPLE_ALIAS = this.sampleAlias;
preAdapterSummaryMetrics.LIBRARY = this.library;
preAdapterSummaryMetrics.REF_BASE = transition.ref();
preAdapterSummaryMetrics.ALT_BASE = transition.call();
preAdapterSummaryMetrics.TOTAL_QSCORE = totalMetric.preAdapterMetrics.QSCORE;
preAdapterSummaryMetrics.WORST_CXT = worstFullMetric.preAdapterMetrics.CONTEXT;
preAdapterSummaryMetrics.WORST_CXT_QSCORE = worstFullMetric.preAdapterMetrics.QSCORE;
preAdapterSummaryMetrics.WORST_PRE_CXT = worstLeadingMetric.preAdapterMetrics.CONTEXT;
preAdapterSummaryMetrics.WORST_PRE_CXT_QSCORE = worstLeadingMetric.preAdapterMetrics.QSCORE;
preAdapterSummaryMetrics.WORST_POST_CXT = worstTrailingMetric.preAdapterMetrics.CONTEXT;
preAdapterSummaryMetrics.WORST_POST_CXT_QSCORE = worstTrailingMetric.preAdapterMetrics.QSCORE;
preAdapterSummaryMetrics.inferArtifactName();
baitBiasSummaryMetrics.SAMPLE_ALIAS = this.sampleAlias;
baitBiasSummaryMetrics.LIBRARY = this.library;
baitBiasSummaryMetrics.REF_BASE = transition.ref();
baitBiasSummaryMetrics.ALT_BASE = transition.call();
baitBiasSummaryMetrics.TOTAL_QSCORE = totalMetric.baitBiasMetrics.QSCORE;
baitBiasSummaryMetrics.WORST_CXT = worstFullMetric.baitBiasMetrics.CONTEXT;
baitBiasSummaryMetrics.WORST_CXT_QSCORE = worstFullMetric.baitBiasMetrics.QSCORE;
baitBiasSummaryMetrics.WORST_PRE_CXT = worstLeadingMetric.baitBiasMetrics.CONTEXT;
baitBiasSummaryMetrics.WORST_PRE_CXT_QSCORE = worstLeadingMetric.baitBiasMetrics.QSCORE;
baitBiasSummaryMetrics.WORST_POST_CXT = worstTrailingMetric.baitBiasMetrics.CONTEXT;
baitBiasSummaryMetrics.WORST_POST_CXT_QSCORE = worstTrailingMetric.baitBiasMetrics.QSCORE;
baitBiasSummaryMetrics.inferArtifactName();
// add the finalized metrics to the map
summaryMetricsMap.put(transition, new SummaryPair(preAdapterSummaryMetrics, baitBiasSummaryMetrics));
}
return summaryMetricsMap;
}
private ListMap<Transition, DetailPair> getDetailMetrics() {
return this.fullContextAccumulator.calculateMetrics(this.sampleAlias, this.library);
}
/**
* Given a list of detail metrics, get the worst pre-adapter metrics, and independently from that get the worst bait bias metrics
* (in terms of Q-score).
*/
private DetailPair getWorstMetrics(final List<DetailPair> metrics) {
PreAdapterDetailMetrics worstPreAdapterMetrics = null;
BaitBiasDetailMetrics worstBaitBiasMetrics = null;
for (final DetailPair m : metrics) {
//The comparator first comparse by QSCORE and then uses other fields to guarrantee a deterministic order
if (worstPreAdapterMetrics == null || m.preAdapterMetrics.compareTo(worstPreAdapterMetrics) < 0) worstPreAdapterMetrics = m.preAdapterMetrics;
if (worstBaitBiasMetrics == null || m.baitBiasMetrics. compareTo(worstBaitBiasMetrics) < 0) worstBaitBiasMetrics = m.baitBiasMetrics;
}
return new DetailPair(worstPreAdapterMetrics, worstBaitBiasMetrics);
}
}