/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation.networkanalysis;
import act.server.MongoDB;
import com.act.biointerpretation.l2expansion.L2PredictionCorpus;
import com.act.jobs.FileChecker;
import com.act.jobs.JavaRunnable;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
/**
* Builds a metabolic network from any of several sources. Can take edges from an existing network,
* a database of reactions, or prediction corpuses.
*/
public class NetworkBuilder implements JavaRunnable {
private static final Logger LOGGER = LogManager.getFormatterLogger(NetworkBuilder.class);
private final Optional<File> seedNetwork;
private final List<File> corpusFiles;
private final Optional<MongoDB> db;
private final File outputFile;
// True if the builder should read in every valid input file even if some inputs are invalid.
// False if builder should crash on even a single invalid input file.
private final boolean skipInvalidInputs;
public NetworkBuilder(
File seedNetwork, List<File> corpusFiles, Optional<MongoDB> db, File outputFile, boolean skipInvalidInputs) {
this.seedNetwork = Optional.ofNullable(seedNetwork);
this.corpusFiles = corpusFiles;
this.db = db;
this.outputFile = outputFile;
this.skipInvalidInputs = skipInvalidInputs;
}
@Override
public void run() throws IOException {
LOGGER.info("Starting NetworkBuilder run.");
// Check input files for validity
if (seedNetwork.isPresent()) {
FileChecker.verifyInputFile(seedNetwork.get());
}
for (File file : corpusFiles) {
FileChecker.verifyInputFile(file);
}
FileChecker.verifyAndCreateOutputFile(outputFile);
LOGGER.info("Checked input files for validity.");
// Read in input corpuses
List<L2PredictionCorpus> corpuses = new ArrayList<>(corpusFiles.size());
for (File file : corpusFiles) {
try {
corpuses.add(L2PredictionCorpus.readPredictionsFromJsonFile(file));
} catch (IOException e) {
LOGGER.warn("Couldn't read file of name %s as input corpus.", file.getName());
if (!skipInvalidInputs) {
throw new IOException("Couldn't read input corpus file " + file.getName() + ": " + e.getMessage());
}
}
}
// Set up network object, and load predictions and reactions into network edges.
MetabolismNetwork network;
if (seedNetwork.isPresent()) {
network = MetabolismNetwork.getNetworkFromJsonFile(seedNetwork.get());
} else {
network = new MetabolismNetwork();
}
LOGGER.info("Created starting network! Loading edges from DB.");
db.ifPresent(network::loadAllEdgesFromDb);
LOGGER.info("Done loading edges from DB, if any. Loading edges from any supplied prediction corpuses.");
corpuses.forEach(corpus -> network.loadPredictions(corpus));
LOGGER.info("Done loading predictions from input corpuses. Writing network to file.");
// Write network out
network.writeToJsonFile(outputFile);
LOGGER.info("Complete! Network has been written to %s", outputFile.getAbsolutePath());
}
}