/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation.sarinference; import chemaxon.formats.MolFormatException; import chemaxon.formats.MolImporter; import chemaxon.struc.Molecule; import com.act.biointerpretation.l2expansion.L2PredictionCorpus; import com.act.biointerpretation.sars.Sar; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import java.util.Arrays; import java.util.function.Consumer; /** * Calculates a SARs hit percentage score by testing the SAR against all substrates in a prediction corpus, and * counting LCMS positives and negatives among the substrates that match the SAR. This is the most complete scoring * possible, as it does not rely on the clustering that generated the SarTree to be perfect (unlike * SarTreeBasedCalculator). However, it is computationally expensive. */ public class SarHitPercentageCalculator implements Consumer<SarTreeNode> { private static final Logger LOGGER = LogManager.getFormatterLogger(SarHitPercentageCalculator.class); private final L2PredictionCorpus positivePredictionCorpus; private final L2PredictionCorpus fullPredictionCorpus; public SarHitPercentageCalculator(L2PredictionCorpus positivePredictions, L2PredictionCorpus fullPredictionCorpus) { this.positivePredictionCorpus = positivePredictions; this.fullPredictionCorpus = fullPredictionCorpus; } /** * Score the SAR against all substrates in the positive corpus, and against all substrates in the entire * corpus, to get a ratio of LCMS hits to misses for this SAR. * * @param node The SarTreeNode to score. */ @Override public void accept(SarTreeNode node) { Sar sar = node.getSar(); node.setNumberHits(getHits(sar, positivePredictionCorpus)); node.setNumberMisses(getHits(sar, fullPredictionCorpus) - node.getNumberHits()); } private Integer getHits(Sar sar, L2PredictionCorpus corpus) { int hits = 0; for (String inchi : corpus.getUniqueSubstrateInchis()) { Molecule substrate; try { substrate = MolImporter.importMol(inchi, "inchi"); } catch (MolFormatException e) { LOGGER.error("Couldn't import substrate %s from prediction corpus: %s", inchi, e.getMessage()); continue; } if (sar.test(Arrays.asList(substrate))) { hits++; } } return hits; } }