/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.lcms.db.analysis;
import com.act.lcms.MassCalculator;
import com.act.lcms.db.io.DB;
import com.act.lcms.db.model.ChemicalAssociatedWithPathway;
import com.act.lcms.db.model.ChemicalOfInterest;
import com.act.lcms.db.model.ConstructEntry;
import com.act.lcms.db.model.CuratedChemical;
import com.act.lcms.db.model.LCMSWell;
import com.act.lcms.db.model.Plate;
import com.act.lcms.db.model.StandardWell;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Utils {
public static String[] ensureNonNull(String[] val) {
return val == null ? new String[0] : val;
}
public static final Pattern PLATE_COORDINATES_PATTERN = Pattern.compile("^([A-Za-z]+)(\\d+)$");
/* Rather than trying to compute the offset of well coordinates on the fly, we pre-compute and choke if we can't find
* the well. This will make it easier to expand to double-character rows if necessary. */
private static final Map<String, Integer> WELL_ROW_TO_INDEX;
static {
Map<String, Integer> m = new HashMap<>();
int i = 0;
for (char c = 'A'; c < 'Z'; c++, i++) {
m.put(String.valueOf(c), i);
}
WELL_ROW_TO_INDEX = Collections.unmodifiableMap(m);
}
/**
* Converts a coordinate string like 'C12' into zero-indexed row and column indices like (2, 11).
* @param coords A coordinate string to parse.
* @return A row and column index pair, where 'A1' is (0, 0).
* @throws IllegalArgumentException Thrown when the coordinates can't be parsed or interpreted.
*/
public static Pair<Integer, Integer> parsePlateCoordinates(String coords) throws IllegalArgumentException {
Integer plateRow = null, plateColumn = null;
Matcher matcher = PLATE_COORDINATES_PATTERN.matcher(coords);
if (!matcher.matches()) {
throw new IllegalArgumentException(String.format("Invalid plate coordinates: %s", coords));
}
String plateRowStr = matcher.group(1);
plateRow = WELL_ROW_TO_INDEX.get(plateRowStr);
if (plateRow == null) {
throw new IllegalArgumentException(String.format(
"Unable to handle multi-character plate row %s for coordinates %s", plateRowStr, coords));
}
plateColumn = Integer.parseInt(matcher.group(2)) - 1;
return Pair.of(plateRow, plateColumn);
}
/**
* Extracts the chemical target for a given construct using data in the constructs table.
* @param db The database to query for construct data.
* @param compositionId The construct id/composition id, like 'ca1' or 'pa2'.
* @return A curated chemical for the target of the specified construct.
* @throws SQLException
*/
public static CuratedChemical extractTargetForConstruct(DB db, String compositionId) throws SQLException {
ConstructEntry cme =
ConstructEntry.getInstance().getCompositionMapEntryByCompositionId(db, compositionId);
if (cme == null) {
System.err.format("WARNING: No construct -> chemical mapping for %s\n", compositionId);
return null;
}
CuratedChemical cc = CuratedChemical.getCuratedChemicalByName(db, cme.getTarget());
if (cc == null) {
System.err.format("WARNING: No curated chemical entry for %s/%s\n", cme.getCompositionId(), cme.getTarget());
return null;
}
if (cc.getMass() <= 0.0d) {
System.err.format("WARNING: Invalid mass for chemical %s/%s (%f)\n",
cme.getCompositionId(), cc.getName(), cc.getMass());
return null;
}
return cc;
}
/**
* Finds the target chemical for a given set of wells, assuming there will be exactly one shared for all positive
* wells in the list.
* @param db The database to query for constructs/chemicals.
* @param positiveWells The list of wells whose standards to find.
* @return An object representing the target chemical for the specified wells.
* @throws SQLException
*/
public static Set<CuratedChemical> extractTargetsForWells(DB db, List<LCMSWell> positiveWells) throws SQLException {
Set<CuratedChemical> chemicals = new HashSet<>();
for (LCMSWell well : positiveWells) {
CuratedChemical cc = extractTargetForConstruct(db, well.getComposition());
if (cc != null) {
chemicals.add(cc);
}
}
return chemicals;
}
/**
* Finds all chemical targets for a set of LCMS wells. Throws an IllegalArgumentException if more than one targets
* are shared by the wells.
* @param db The DB to query for information about the wells/targets.
* @param wells A set of wells whose targets to scan.
* @return The single shared target of all the wells, or null.
* @throws SQLException
* @throws IllegalArgumentException Thrown when the wells share more than one target chemical.
*/
public static CuratedChemical requireOneTarget(DB db, List<LCMSWell> wells)
throws SQLException, IllegalArgumentException {
Set<CuratedChemical> chemicals = extractTargetsForWells(db, wells);
if (chemicals.size() > 1) {
// TODO: is there a foreach approach that we can use here that won't break backwards compatibility?
List<String> chemicalNames = new ArrayList<>(chemicals.size());
for (CuratedChemical chemical : chemicals) {
chemicalNames.add(chemical.getName());
}
throw new IllegalArgumentException(
String.format("Found multiple target chemicals where one required: %s", StringUtils.join(chemicalNames, ", "))
);
} else if (chemicals.size() < 1) {
return null;
}
return chemicals.iterator().next();
}
/**
* Filters a set of metlin masses by include/exclude ion names.
* @param metlinMassesPreFilter A map of ion names to masses.
* @param includeIons A set of ion names to include (all others will be excluded).
* @param excludeIons A set of ion names to exclude. Exclusion takes priority over inclusion.
* @return A map of ion names to masses filtered by the include/exclude sets.
*/
public static Map<String, Double> filterMasses(Map<String, Double> metlinMassesPreFilter,
Set<String> includeIons, Set<String> excludeIons) {
// Don't filter if there's nothing by which to filter.
if ((includeIons == null || includeIons.size() == 0) && (excludeIons == null || excludeIons.size() == 0)) {
return metlinMassesPreFilter;
}
// Create a fresh map and add from the old one as we go. (Could also copy and remove, but that seems weird.)
Map<String, Double> metlinMasses = new HashMap<>(metlinMassesPreFilter.size());
/* Iterate over the old copy to reduce the risk of concurrent modification exceptions.
* Note: this is not thread safe. */
for (Map.Entry<String, Double> entry : metlinMassesPreFilter.entrySet()) {
// Skip all exclude values immediately.
if (excludeIons != null && excludeIons.contains(entry.getKey())) {
continue;
}
// If includeIons is defined, only keep those
if (includeIons == null || includeIons.contains(entry.getKey())) {
metlinMasses.put(entry.getKey(), entry.getValue());
}
}
return metlinMasses;
}
/**
* Find a well containing the specified chemical in the plate with a given barcode.
* @param db A DB containing plate/well data.
* @param standardPlateBarcode The barcode of the plate in which to search.
* @param standardName The name of the chemical to find.
* @param failIfMissing Throw an exception if the specified standard cannot be found in the specified plate.
* @return The StandardWell in the specified plate that contains the specified chemical.
* @throws SQLException
* @throws IllegalArgumentException thrown when the plate is invalid or the chemical cannot be found therein.
*/
public static StandardWell extractStandardWellFromPlate(DB db, String standardPlateBarcode,
String standardName, boolean failIfMissing)
throws SQLException, IllegalArgumentException, IOException, ClassNotFoundException {
Plate standardPlate = Plate.getPlateByBarcode(db, standardPlateBarcode);
if (standardPlate == null) {
throw new IllegalArgumentException(
String.format("Unable to find standard plate with barcode %s", standardPlateBarcode));
}
if (standardPlate.getContentType() != Plate.CONTENT_TYPE.STANDARD) {
throw new IllegalArgumentException(
String.format("Plate with barcode %s has content type %s, expected %s",
standardPlateBarcode, standardPlate.getContentType(), Plate.CONTENT_TYPE.STANDARD)
);
}
List<StandardWell> standardWells = StandardWell.getInstance().getByPlateId(db, standardPlate.getId());
for (StandardWell well : standardWells) {
if (standardName.equals(well.getChemical())) {
System.out.format("Found matching standard well at %s (%s)\n", well.getCoordinatesString(), well.getChemical());
return well;
}
}
if (failIfMissing) {
throw new IllegalArgumentException(
String.format("Unable to find standard chemical %s in plate %s", standardName, standardPlateBarcode)
);
}
return null;
}
// Fail on missing set to true by default.
public static StandardWell extractStandardWellFromPlate(DB db, String standardPlateBarcode, String standardName)
throws SQLException, IllegalArgumentException, IOException, ClassNotFoundException {
return extractStandardWellFromPlate(db, standardPlateBarcode, standardName, true);
}
/**
* Parses a mass value from a string (like 123.456), or searches for a chemical by name and computs the mass.
* @param db A DB to query for chemicals if massStr does not contain a number.
* @param massStr A numeric mass value or a chemical name whose mass to find.
* @return A pair containing a textual description of the value used and a mass value.
* @throws SQLException
* @throws IllegalArgumentException Thrown when the massStr can't be parsed or found in the DB.
*/
public static Pair<String, Double> extractMassFromString(DB db, String massStr)
throws SQLException, IllegalArgumentException {
Pair<String, Double> searchMZ;
try {
Double mz = Double.parseDouble(massStr);
return Pair.of("raw-m/z", mz);
} catch (IllegalArgumentException e) {
CuratedChemical targetChemical = CuratedChemical.getCuratedChemicalByName(db, massStr);
if (targetChemical != null) {
Double mz = targetChemical.getMass();
return Pair.of(massStr, mz);
}
List<ChemicalOfInterest> chemicalsOfInterest =
ChemicalOfInterest.getInstance().getChemicalOfInterestByName(db, massStr);
if (chemicalsOfInterest == null || chemicalsOfInterest.size() == 0) {
throw new IllegalArgumentException(
String.format("Unable to parse or find chemical name for string: %s", massStr));
}
if (chemicalsOfInterest.size() != 1) {
System.err.format("WARNING: found multiple chemicals of interest for name '%s', using first\n", massStr);
}
ChemicalOfInterest chem = chemicalsOfInterest.get(0);
Double mz = MassCalculator.calculateMass(chem.getInchi());
System.out.format("Using reference M/Z for specified chemical %s (%f)\n", chem.getName(), mz);
return Pair.of(massStr, mz);
}
}
/**
* Produces an ordered list of chemicals and their masses that represent the intermediate and side-reaction products
* of the pathway encoded in a particular construct. These are returned as a list rather than a hash to keep them in
* pathway order (from last/highest to first/lowest intermediate or side-reaction).
* @param db The database in which to search for chemicals associated with the specific construct.
* @param constructId The construct whose products to search for.
* @return A pathway-ordered list of produced chemicals and their masses.
* @throws SQLException
*/
public static List<Pair<ChemicalAssociatedWithPathway, Double>> extractMassesForChemicalsAssociatedWithConstruct(
DB db, String constructId) throws SQLException {
List<Pair<ChemicalAssociatedWithPathway, Double>> results = new ArrayList<>();
// Assumes the chems come back in index-sorted order, which should be guaranteed by the query that this call runs.
List<ChemicalAssociatedWithPathway> products =
ChemicalAssociatedWithPathway.getInstance().getChemicalsAssociatedWithPathwayByConstructId(db, constructId);
for (ChemicalAssociatedWithPathway product : products) {
String chemName = product.getChemical();
CuratedChemical curatedChemical = CuratedChemical.getCuratedChemicalByName(db, chemName);
// Attempt to find the product in the list of curated chemicals, then fall back to mass computation by InChI.
if (curatedChemical != null) {
results.add(Pair.of(product, curatedChemical.getMass()));
continue;
}
Double mass = ChemicalOfInterest.getInstance().getAnyAvailableMassByName(db, chemName);
if (mass == null) {
System.err.format("ERROR: no usable chemical entries found for %s, skipping\n", chemName);
continue;
}
results.add(Pair.of(product, mass));
}
return results;
}
/**
* Given arrays of strain and/or construct ids, find all LCMS wells matching those strains/constructs. If a set of
* plates is specified, only wells in plates that are in that set will be considered.
* @param db The DB to query for well information.
* @param searchStrains A list of strain ids (MSIDs) for which to search.
* @param searchConstructs A list of construct ids for which to search.
* @param restrictToPlateIds An optional set of plates on which to filter wells.
* @param takeOnePerStrainOrConstruct Only select one sample per strain or construct (useful for negative controls).
* @return A list of LCMS wells containing the specified strains/constructs, and the set of plate ids for those wells.
* @throws SQLException
*/
public static Pair<List<LCMSWell>, Set<Integer>> extractWellsAndPlateIds(
DB db, String[] searchStrains, String[] searchConstructs, Set<Integer> restrictToPlateIds,
boolean takeOnePerStrainOrConstruct) throws SQLException {
String[] strains = ensureNonNull(searchStrains);
String[] constructs = ensureNonNull(searchConstructs);
List<LCMSWell> matchingWells = new ArrayList<>();
Set<Integer> seenWellIds = new HashSet<>();
Set<Integer> seenPlateIds = new HashSet<>();
Set<String> selectedStrains = new HashSet<>();
Set<String> selectedConstructs = new HashSet<>();
for (String s : strains) {
List<LCMSWell> res = LCMSWell.getInstance().getByStrain(db, s);
for (LCMSWell well : res) {
if (restrictToPlateIds != null && !restrictToPlateIds.contains(well.getPlateId())) {
continue;
}
// Skip this well if we've already selected a sample with the same MSID.
if (takeOnePerStrainOrConstruct) {
if (selectedStrains.contains(well.getMsid())) {
continue;
}
}
if (!seenWellIds.contains(well.getId())) {
matchingWells.add(well);
seenWellIds.add(well.getId());
seenPlateIds.add(well.getPlateId());
if (takeOnePerStrainOrConstruct) {
// Save the strain and construct for filtering if we only want to pick one.
selectedConstructs.add(well.getComposition());
selectedStrains.add(well.getMsid());
break;
}
}
}
}
for (String c : constructs) {
List<LCMSWell> res = LCMSWell.getInstance().getByConstructID(db, c);
for (LCMSWell well : res) {
if (restrictToPlateIds != null && !restrictToPlateIds.contains(well.getPlateId())) {
continue;
}
if (takeOnePerStrainOrConstruct) {
if (selectedConstructs.contains(well.getComposition())) {
continue;
}
}
if (!seenWellIds.contains(well.getId())) {
matchingWells.add(well);
seenWellIds.add(well.getId());
seenPlateIds.add(well.getPlateId());
if (takeOnePerStrainOrConstruct) {
// Just save the construct for filtering since we won't consider strain again.
selectedConstructs.add(well.getComposition());
break;
}
}
}
}
return Pair.of(matchingWells, seenPlateIds);
}
}