/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.biointerpretation.networkanalysis;
import com.act.jobs.FileChecker;
import com.act.jobs.JavaRunnable;
import com.act.lcms.v2.IonCalculator;
import com.act.lcms.v2.LcmsIonCalculator;
import com.act.lcms.v2.PeakSpectrum;
import com.act.utils.TSVWriter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
/**
* Workflow component to take in a graph, calculate a precursor subgraph, and write the subgraph to file.
*/
public class PrecursorAnalysis implements JavaRunnable {
private static final Logger LOGGER = LogManager.getFormatterLogger(PrecursorAnalysis.class);
public static final String PRECURSOR_PREFIX = "precursors_target_";
private static final String TARGET_ID_HEADER = "target_id";
private static final String INCHI_HEADER = "InChI";
private final File networkInput;
private final Optional<File> lcmsInput;
// The targets of the analysis: InChI strings of molecules whose network precursors we would like to identify.
private final List<String> targets;
private final File outputDirectory;
// The number of edges/hops/steps to search backwards from the targets to identify relevant precursors in the network.
private final int numSteps;
private Set<String> ionSet = new HashSet<String>() {{
add("M+H");
add("M+Na");
add("M+H-H2O");
}};
public PrecursorAnalysis(File networkInput, List<String> targets, int numSteps, File outputDirectory) {
this(networkInput, null, targets, numSteps, outputDirectory);
}
public PrecursorAnalysis(File networkInput,
Optional<File> lcmsInput,
List<String> targets,
int numSteps,
File outputDirectory) {
this.networkInput = networkInput;
this.lcmsInput = lcmsInput;
this.targets = targets;
this.numSteps = numSteps;
this.outputDirectory = outputDirectory;
}
public void setIons(Set<String> ions) {
this.ionSet = ions;
}
@Override
public void run() throws IOException {
File targetIdFile = new File(outputDirectory, "targetIds");
// Verify files
FileChecker.verifyInputFile(networkInput);
FileChecker.verifyOrCreateDirectory(outputDirectory);
FileChecker.verifyAndCreateOutputFile(targetIdFile);
LOGGER.info("Verified files. Loading network");
// Get input network
MetabolismNetwork network = MetabolismNetwork.getNetworkFromJsonFile(networkInput);
LOGGER.info("Loaded network from file. Running precursor analyses.");
Map<String, Integer> targetIdMap = new HashMap<>();
int id = 0;
IonCalculator ionCalculator = lcmsInput.isPresent() ? new LcmsIonCalculator() : null;
PeakSpectrum lcmsSpectrum = lcmsInput.isPresent() ? LcmsTSVParser.parseTSV(lcmsInput.get()) : null;
// Do precursor analyses on each target. Give each found target an ID so we can track which report is which.
for (String target : targets) {
Optional<NetworkNode> targetNode = network.getNodeOptionByInchi(target);
if (targetNode.isPresent()) {
PrecursorReport report = network.getPrecursorReport(targetNode.get(), numSteps);
lcmsInput.ifPresent(a -> report.addLcmsData(lcmsSpectrum, ionCalculator, ionSet));
File outputFile = new File(outputDirectory, PRECURSOR_PREFIX + id);
report.writeToJsonFile(outputFile);
LOGGER.info("Wrote target %s report to file %s", target, outputFile.getAbsolutePath());
targetIdMap.put(target, id);
id++;
} else {
LOGGER.warn("Target node %s not found in network!", target);
}
}
// Write out the target IDs to file for reference.
writeTargetIdMapToFile(targetIdMap, targetIdFile);
LOGGER.info("Complete! Output files live in directory %s", outputDirectory.getAbsolutePath());
}
/**
* Write out the target ID map. This is a TSV file where each line contains the ID followed by the target's InChI.
* @param targetIdMap The map to write.
* @param targetIdFile The file to write to.
* @throws IOException
*/
private void writeTargetIdMapToFile(Map<String, Integer> targetIdMap, File targetIdFile) throws IOException {
try (TSVWriter<String, String> writer = new TSVWriter<>(Arrays.asList(TARGET_ID_HEADER, INCHI_HEADER))) {
writer.open(targetIdFile);
for (Map.Entry<String, Integer> entry : targetIdMap.entrySet()) {
writer.append(new HashMap<String, String>() {{
put(TARGET_ID_HEADER, entry.getValue().toString());
put(INCHI_HEADER, entry.getKey());
}});
}
}
}
}