/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.lcms.v2;
import com.act.lcms.XZ;
import com.act.lcms.db.analysis.WaveformAnalysis;
import com.act.lcms.db.model.MS1ScanForWellAndMassCharge;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.rocksdb.RocksDBException;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
public class TraceIndexAnalyzer {
private static final Logger LOGGER = LogManager.getFormatterLogger(TraceIndexAnalyzer.class);
private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
private static final String OPTION_INDEX_PATH = "x";
private static final String OPTION_OUTPUT_PATH = "o";
public static final String HELP_MESSAGE = StringUtils.join(new String[]{
"This class consumes windowed traces from an LCMS scan files, searching each window for peaks and writing the ",
"results of its analysis in a large JSON document."
}, "");
public static final List<Option.Builder> OPTION_BUILDERS = new ArrayList<Option.Builder>() {{
add(Option.builder(OPTION_INDEX_PATH)
.argName("index path")
.desc("A path to the directory where the on-disk index is stored; must already exist")
.hasArg().required()
.longOpt("index")
);
add(Option.builder(OPTION_OUTPUT_PATH)
.argName("outputpath")
.desc("A path where the SNR/time/intensity results per window should be written as JSON")
.hasArg().required()
.longOpt("output")
);
add(Option.builder("h")
.argName("help")
.desc("Prints this help message")
.longOpt("help")
);
}};
public static final HelpFormatter HELP_FORMATTER = new HelpFormatter();
static {
HELP_FORMATTER.setWidth(100);
}
public static class AnalysisResult {
@JsonProperty("mz")
private Double mz;
@JsonProperty("peak_intensity")
private Double peakIntensity;
@JsonProperty("peak_time")
private Double peakTime;
protected AnalysisResult() {
}
public AnalysisResult(Double mz, Double peakIntensity, Double peakTime) {
this.mz = mz;
this.peakIntensity = peakIntensity;
this.peakTime = peakTime;
}
public Double getMz() {
return mz;
}
protected void setMz(Double mz) {
this.mz = mz;
}
public Double getPeakIntensity() {
return peakIntensity;
}
protected void setPeakIntensity(Double peakIntensity) {
this.peakIntensity = peakIntensity;
}
public Double getPeakTime() {
return peakTime;
}
protected void setPeakTime(Double peakTime) {
this.peakTime = peakTime;
}
}
public static void main(String[] args) throws Exception {
Options opts = new Options();
for (Option.Builder b : OPTION_BUILDERS) {
opts.addOption(b.build());
}
CommandLine cl = null;
try {
CommandLineParser parser = new DefaultParser();
cl = parser.parse(opts, args);
} catch (ParseException e) {
System.err.format("Argument parsing failed: %s\n", e.getMessage());
HELP_FORMATTER.printHelp(TraceIndexExtractor.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
if (cl.hasOption("help")) {
HELP_FORMATTER.printHelp(TraceIndexExtractor.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
return;
}
File rocksDBFile = new File(cl.getOptionValue(OPTION_INDEX_PATH));
if (!rocksDBFile.exists()) {
System.err.format("Index file at %s does not exist, nothing to analyze", rocksDBFile.getAbsolutePath());
HELP_FORMATTER.printHelp(TraceIndexExtractor.class.getCanonicalName(), HELP_MESSAGE, opts, null, true);
System.exit(1);
}
LOGGER.info("Starting analysis");
TraceIndexAnalyzer analyzer = new TraceIndexAnalyzer();
analyzer.runExtraction(
rocksDBFile,
new File(cl.getOptionValue(OPTION_OUTPUT_PATH))
);
LOGGER.info("Done");
}
private void runExtraction(File rocksDBFile, File outputFile)
throws RocksDBException, IOException {
List<AnalysisResult> results = new ArrayList<>();
// Extract each target's trace, computing and saving stats as we go. This should fit in memory no problem.
Iterator<Pair<Double, List<XZ>>> traceIterator = new TraceIndexExtractor().getIteratorOverTraces(rocksDBFile);
while (traceIterator.hasNext()) {
Pair<Double, List<XZ>> targetAndTrace = traceIterator.next();
String label = String.format("%.6f", targetAndTrace.getLeft());
// Note: here we cheat by knowing how the MS1 class is going to use this incredibly complex container.
MS1ScanForWellAndMassCharge scanForWell = new MS1ScanForWellAndMassCharge();
scanForWell.setMetlinIons(Collections.singletonList(label));
scanForWell.getIonsToSpectra().put(label, targetAndTrace.getRight());
Pair<List<XZ>, Map<Double, Double>> timeWindowsAndMaxes =
WaveformAnalysis.compressIntensityAndTimeGraphsAndFindMaxIntensityInEveryTimeWindow(
targetAndTrace.getRight(), WaveformAnalysis.COMPRESSION_CONSTANT);
List<XZ> calledPeaks = WaveformAnalysis.detectPeaksInIntensityTimeWaveform(
timeWindowsAndMaxes.getLeft(), WaveformAnalysis.PEAK_DETECTION_THRESHOLD); // Same as waveform analysis.
for (XZ calledPeak : calledPeaks) {
AnalysisResult result = new AnalysisResult(
targetAndTrace.getLeft(),
timeWindowsAndMaxes.getRight().get(calledPeak.getTime()),
calledPeak.getTime()
);
results.add(result);
}
}
LOGGER.info("Writing window stats to JSON file");
try (FileOutputStream fos = new FileOutputStream(outputFile)) {
OBJECT_MAPPER.writerWithDefaultPrettyPrinter().writeValue(fos, results);
}
}
}