/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.biointerpretation.analytics; import act.server.DBIterator; import act.server.NoSQLAPI; import act.shared.Reaction; import com.act.utils.TSVWriter; import com.mongodb.BasicDBObject; import com.mongodb.DBObject; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Option; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.json.JSONObject; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; public class ReactionDeletion { private static final Logger LOGGER = LogManager.getFormatterLogger(ReactionDeletion.class); public static final String OPTION_OUTPUT_PATH = "o"; public static final String OPTION_SOURCE_DB = "r"; public static final String OPTION_SINK_DB = "k"; public static final String HELP_MESSAGE = StringUtils.join(new String[]{ "This class is used to find all reactions that were not carried forward from a read to a write DB. ", "This analysis only applies to a pair of consecutive DBs in the biointepretation pipeline, and does not span ", "multiple processing steps." } , ""); public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{ add(Option.builder(OPTION_OUTPUT_PATH) .argName("output path") .desc("A path to where output should be written") .hasArg().required() .longOpt("output") ); add(Option.builder(OPTION_SOURCE_DB) .argName("db name") .desc("DB from which reactions were read") .hasArg().required() .longOpt("source") ); add(Option.builder(OPTION_SINK_DB) .argName("db name") .desc("DB to which reactions were written") .hasArg().required() .longOpt("sink") ); add(Option.builder("h") .argName("help") .desc("Prints this help message") .longOpt("help") ); }}; public static final HelpFormatter HELP_FORMATTER = new HelpFormatter(); static { HELP_FORMATTER.setWidth(100); } public static void main(String[] args) throws Exception { Options opts = new Options(); for (Option.Builder b : OPTION_BUILDERS) { opts.addOption(b.build()); } CommandLine cl = null; try { CommandLineParser parser = new DefaultParser(); cl = parser.parse(opts, args); } catch (ParseException e) { LOGGER.error(String.format("Argument parsing failed: %s\n", e.getMessage())); HELP_FORMATTER.printHelp(ReactionCountProvenance.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); System.exit(1); } if (cl.hasOption("help")) { HELP_FORMATTER.printHelp(ReactionCountProvenance.class.getCanonicalName(), HELP_MESSAGE, opts, null, true); return; } if (!cl.hasOption(OPTION_OUTPUT_PATH)) { LOGGER.error("Input -o prefix"); return; } NoSQLAPI srcApi = new NoSQLAPI(cl.getOptionValue(OPTION_SOURCE_DB), cl.getOptionValue(OPTION_SOURCE_DB)); NoSQLAPI sinkApi = new NoSQLAPI(cl.getOptionValue(OPTION_SINK_DB), cl.getOptionValue(OPTION_SINK_DB)); searchForDroppedReactions(srcApi, sinkApi, new File(cl.getOptionValue(OPTION_OUTPUT_PATH))); } private static final List<String> OUTPUT_HEADER = Arrays.asList( "id", "substrates", "products", "ecnum", "easy_desc" ); public static void searchForDroppedReactions(NoSQLAPI srcApi, NoSQLAPI sinkApi, File outputFile) throws IOException { Set<Long> srcIds = new HashSet<>(); DBIterator iterator = srcApi.getReadDB().getIteratorOverReactions( new BasicDBObject("$query", new BasicDBObject()).append("$orderby", new BasicDBObject("_id", 1)), new BasicDBObject("_id", true)); while (iterator.hasNext()) { DBObject obj = iterator.next(); Object id = obj.get("_id"); if (id instanceof Integer) { Integer idi = (Integer)id; srcIds.add(idi.longValue()); } else { String msg = String.format("Found unexpected %s value for _id in src DB: %s", id.getClass().getName(), id); LOGGER.error(msg); throw new RuntimeException(msg); } } iterator.close(); Iterator<Reaction> sinkRxns = sinkApi.readRxnsFromInKnowledgeGraph(); while (sinkRxns.hasNext()) { Reaction rxn = sinkRxns.next(); for (JSONObject protein : rxn.getProteinData()) { if (protein.has("source_reaction_id")) { Long srcId = protein.getLong("source_reaction_id"); srcIds.remove(srcId); } else LOGGER.error("Found protein without source id for reaction %d", rxn.getUUID()); } } if (srcIds.size() == 0) { LOGGER.info("No source read DB ids were unaccounted for in the write DB. Exiting without writing output."); return; } List<Long> sortedSrcIds = new ArrayList<>(srcIds); Collections.sort(sortedSrcIds); try (TSVWriter<String, String> writer = new TSVWriter<>(OUTPUT_HEADER)) { writer.open(outputFile); int noProteinReactions = 0; for (Long id : sortedSrcIds) { Reaction rxn = srcApi.readReactionFromInKnowledgeGraph(id); if (rxn == null) { LOGGER.error("Could not read reaction %d from source DB", id); continue; } if (rxn.getProteinData().size() == 0) { LOGGER.debug("Reaction %d has no proteins, and so cannot participate in the provenance chain", rxn.getUUID()); noProteinReactions++; continue; } Map<String, String> row = new HashMap<String, String>(OUTPUT_HEADER.size()) {{ put("id", Long.valueOf(rxn.getUUID()).toString()); put("substrates", "{" + StringUtils.join(rxn.getSubstrates(), ",") + "}"); put("products", "{" + StringUtils.join(rxn.getProducts(), ",") + "}"); put("ecnum", rxn.getECNum()); put("easy_desc", rxn.getReactionName()); }}; writer.append(row); writer.flush(); } LOGGER.info("Found %d reactions with no proteins of %d reactions that might have been deleted", noProteinReactions, srcIds.size()); } } }