/************************************************************************* * * * This file is part of the 20n/act project. * * 20n/act enables DNA prediction for synthetic biology/bioengineering. * * Copyright (C) 2017 20n Labs, Inc. * * * * Please direct all queries to act@20n.com. * * * * This program is free software: you can redistribute it and/or modify * * it under the terms of the GNU General Public License as published by * * the Free Software Foundation, either version 3 of the License, or * * (at your option) any later version. * * * * This program is distributed in the hope that it will be useful, * * but WITHOUT ANY WARRANTY; without even the implied warranty of * * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * * GNU General Public License for more details. * * * * You should have received a copy of the GNU General Public License * * along with this program. If not, see <http://www.gnu.org/licenses/>. * * * *************************************************************************/ package com.act.lcms.db.analysis; import com.act.lcms.MassCalculator; import com.act.lcms.db.io.DB; import com.act.lcms.db.model.ChemicalAssociatedWithPathway; import com.act.lcms.db.model.ChemicalOfInterest; import com.act.lcms.db.model.ConstructEntry; import com.act.lcms.db.model.CuratedChemical; import com.act.lcms.db.model.LCMSWell; import com.act.lcms.db.model.Plate; import com.act.lcms.db.model.StandardWell; import org.apache.commons.lang3.StringUtils; import org.apache.commons.lang3.tuple.Pair; import java.io.IOException; import java.sql.SQLException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; public class Utils { public static String[] ensureNonNull(String[] val) { return val == null ? new String[0] : val; } public static final Pattern PLATE_COORDINATES_PATTERN = Pattern.compile("^([A-Za-z]+)(\\d+)$"); /* Rather than trying to compute the offset of well coordinates on the fly, we pre-compute and choke if we can't find * the well. This will make it easier to expand to double-character rows if necessary. */ private static final Map<String, Integer> WELL_ROW_TO_INDEX; static { Map<String, Integer> m = new HashMap<>(); int i = 0; for (char c = 'A'; c < 'Z'; c++, i++) { m.put(String.valueOf(c), i); } WELL_ROW_TO_INDEX = Collections.unmodifiableMap(m); } /** * Converts a coordinate string like 'C12' into zero-indexed row and column indices like (2, 11). * @param coords A coordinate string to parse. * @return A row and column index pair, where 'A1' is (0, 0). * @throws IllegalArgumentException Thrown when the coordinates can't be parsed or interpreted. */ public static Pair<Integer, Integer> parsePlateCoordinates(String coords) throws IllegalArgumentException { Integer plateRow = null, plateColumn = null; Matcher matcher = PLATE_COORDINATES_PATTERN.matcher(coords); if (!matcher.matches()) { throw new IllegalArgumentException(String.format("Invalid plate coordinates: %s", coords)); } String plateRowStr = matcher.group(1); plateRow = WELL_ROW_TO_INDEX.get(plateRowStr); if (plateRow == null) { throw new IllegalArgumentException(String.format( "Unable to handle multi-character plate row %s for coordinates %s", plateRowStr, coords)); } plateColumn = Integer.parseInt(matcher.group(2)) - 1; return Pair.of(plateRow, plateColumn); } /** * Extracts the chemical target for a given construct using data in the constructs table. * @param db The database to query for construct data. * @param compositionId The construct id/composition id, like 'ca1' or 'pa2'. * @return A curated chemical for the target of the specified construct. * @throws SQLException */ public static CuratedChemical extractTargetForConstruct(DB db, String compositionId) throws SQLException { ConstructEntry cme = ConstructEntry.getInstance().getCompositionMapEntryByCompositionId(db, compositionId); if (cme == null) { System.err.format("WARNING: No construct -> chemical mapping for %s\n", compositionId); return null; } CuratedChemical cc = CuratedChemical.getCuratedChemicalByName(db, cme.getTarget()); if (cc == null) { System.err.format("WARNING: No curated chemical entry for %s/%s\n", cme.getCompositionId(), cme.getTarget()); return null; } if (cc.getMass() <= 0.0d) { System.err.format("WARNING: Invalid mass for chemical %s/%s (%f)\n", cme.getCompositionId(), cc.getName(), cc.getMass()); return null; } return cc; } /** * Finds the target chemical for a given set of wells, assuming there will be exactly one shared for all positive * wells in the list. * @param db The database to query for constructs/chemicals. * @param positiveWells The list of wells whose standards to find. * @return An object representing the target chemical for the specified wells. * @throws SQLException */ public static Set<CuratedChemical> extractTargetsForWells(DB db, List<LCMSWell> positiveWells) throws SQLException { Set<CuratedChemical> chemicals = new HashSet<>(); for (LCMSWell well : positiveWells) { CuratedChemical cc = extractTargetForConstruct(db, well.getComposition()); if (cc != null) { chemicals.add(cc); } } return chemicals; } /** * Finds all chemical targets for a set of LCMS wells. Throws an IllegalArgumentException if more than one targets * are shared by the wells. * @param db The DB to query for information about the wells/targets. * @param wells A set of wells whose targets to scan. * @return The single shared target of all the wells, or null. * @throws SQLException * @throws IllegalArgumentException Thrown when the wells share more than one target chemical. */ public static CuratedChemical requireOneTarget(DB db, List<LCMSWell> wells) throws SQLException, IllegalArgumentException { Set<CuratedChemical> chemicals = extractTargetsForWells(db, wells); if (chemicals.size() > 1) { // TODO: is there a foreach approach that we can use here that won't break backwards compatibility? List<String> chemicalNames = new ArrayList<>(chemicals.size()); for (CuratedChemical chemical : chemicals) { chemicalNames.add(chemical.getName()); } throw new IllegalArgumentException( String.format("Found multiple target chemicals where one required: %s", StringUtils.join(chemicalNames, ", ")) ); } else if (chemicals.size() < 1) { return null; } return chemicals.iterator().next(); } /** * Filters a set of metlin masses by include/exclude ion names. * @param metlinMassesPreFilter A map of ion names to masses. * @param includeIons A set of ion names to include (all others will be excluded). * @param excludeIons A set of ion names to exclude. Exclusion takes priority over inclusion. * @return A map of ion names to masses filtered by the include/exclude sets. */ public static Map<String, Double> filterMasses(Map<String, Double> metlinMassesPreFilter, Set<String> includeIons, Set<String> excludeIons) { // Don't filter if there's nothing by which to filter. if ((includeIons == null || includeIons.size() == 0) && (excludeIons == null || excludeIons.size() == 0)) { return metlinMassesPreFilter; } // Create a fresh map and add from the old one as we go. (Could also copy and remove, but that seems weird.) Map<String, Double> metlinMasses = new HashMap<>(metlinMassesPreFilter.size()); /* Iterate over the old copy to reduce the risk of concurrent modification exceptions. * Note: this is not thread safe. */ for (Map.Entry<String, Double> entry : metlinMassesPreFilter.entrySet()) { // Skip all exclude values immediately. if (excludeIons != null && excludeIons.contains(entry.getKey())) { continue; } // If includeIons is defined, only keep those if (includeIons == null || includeIons.contains(entry.getKey())) { metlinMasses.put(entry.getKey(), entry.getValue()); } } return metlinMasses; } /** * Find a well containing the specified chemical in the plate with a given barcode. * @param db A DB containing plate/well data. * @param standardPlateBarcode The barcode of the plate in which to search. * @param standardName The name of the chemical to find. * @param failIfMissing Throw an exception if the specified standard cannot be found in the specified plate. * @return The StandardWell in the specified plate that contains the specified chemical. * @throws SQLException * @throws IllegalArgumentException thrown when the plate is invalid or the chemical cannot be found therein. */ public static StandardWell extractStandardWellFromPlate(DB db, String standardPlateBarcode, String standardName, boolean failIfMissing) throws SQLException, IllegalArgumentException, IOException, ClassNotFoundException { Plate standardPlate = Plate.getPlateByBarcode(db, standardPlateBarcode); if (standardPlate == null) { throw new IllegalArgumentException( String.format("Unable to find standard plate with barcode %s", standardPlateBarcode)); } if (standardPlate.getContentType() != Plate.CONTENT_TYPE.STANDARD) { throw new IllegalArgumentException( String.format("Plate with barcode %s has content type %s, expected %s", standardPlateBarcode, standardPlate.getContentType(), Plate.CONTENT_TYPE.STANDARD) ); } List<StandardWell> standardWells = StandardWell.getInstance().getByPlateId(db, standardPlate.getId()); for (StandardWell well : standardWells) { if (standardName.equals(well.getChemical())) { System.out.format("Found matching standard well at %s (%s)\n", well.getCoordinatesString(), well.getChemical()); return well; } } if (failIfMissing) { throw new IllegalArgumentException( String.format("Unable to find standard chemical %s in plate %s", standardName, standardPlateBarcode) ); } return null; } // Fail on missing set to true by default. public static StandardWell extractStandardWellFromPlate(DB db, String standardPlateBarcode, String standardName) throws SQLException, IllegalArgumentException, IOException, ClassNotFoundException { return extractStandardWellFromPlate(db, standardPlateBarcode, standardName, true); } /** * Parses a mass value from a string (like 123.456), or searches for a chemical by name and computs the mass. * @param db A DB to query for chemicals if massStr does not contain a number. * @param massStr A numeric mass value or a chemical name whose mass to find. * @return A pair containing a textual description of the value used and a mass value. * @throws SQLException * @throws IllegalArgumentException Thrown when the massStr can't be parsed or found in the DB. */ public static Pair<String, Double> extractMassFromString(DB db, String massStr) throws SQLException, IllegalArgumentException { Pair<String, Double> searchMZ; try { Double mz = Double.parseDouble(massStr); return Pair.of("raw-m/z", mz); } catch (IllegalArgumentException e) { CuratedChemical targetChemical = CuratedChemical.getCuratedChemicalByName(db, massStr); if (targetChemical != null) { Double mz = targetChemical.getMass(); return Pair.of(massStr, mz); } List<ChemicalOfInterest> chemicalsOfInterest = ChemicalOfInterest.getInstance().getChemicalOfInterestByName(db, massStr); if (chemicalsOfInterest == null || chemicalsOfInterest.size() == 0) { throw new IllegalArgumentException( String.format("Unable to parse or find chemical name for string: %s", massStr)); } if (chemicalsOfInterest.size() != 1) { System.err.format("WARNING: found multiple chemicals of interest for name '%s', using first\n", massStr); } ChemicalOfInterest chem = chemicalsOfInterest.get(0); Double mz = MassCalculator.calculateMass(chem.getInchi()); System.out.format("Using reference M/Z for specified chemical %s (%f)\n", chem.getName(), mz); return Pair.of(massStr, mz); } } /** * Produces an ordered list of chemicals and their masses that represent the intermediate and side-reaction products * of the pathway encoded in a particular construct. These are returned as a list rather than a hash to keep them in * pathway order (from last/highest to first/lowest intermediate or side-reaction). * @param db The database in which to search for chemicals associated with the specific construct. * @param constructId The construct whose products to search for. * @return A pathway-ordered list of produced chemicals and their masses. * @throws SQLException */ public static List<Pair<ChemicalAssociatedWithPathway, Double>> extractMassesForChemicalsAssociatedWithConstruct( DB db, String constructId) throws SQLException { List<Pair<ChemicalAssociatedWithPathway, Double>> results = new ArrayList<>(); // Assumes the chems come back in index-sorted order, which should be guaranteed by the query that this call runs. List<ChemicalAssociatedWithPathway> products = ChemicalAssociatedWithPathway.getInstance().getChemicalsAssociatedWithPathwayByConstructId(db, constructId); for (ChemicalAssociatedWithPathway product : products) { String chemName = product.getChemical(); CuratedChemical curatedChemical = CuratedChemical.getCuratedChemicalByName(db, chemName); // Attempt to find the product in the list of curated chemicals, then fall back to mass computation by InChI. if (curatedChemical != null) { results.add(Pair.of(product, curatedChemical.getMass())); continue; } Double mass = ChemicalOfInterest.getInstance().getAnyAvailableMassByName(db, chemName); if (mass == null) { System.err.format("ERROR: no usable chemical entries found for %s, skipping\n", chemName); continue; } results.add(Pair.of(product, mass)); } return results; } /** * Given arrays of strain and/or construct ids, find all LCMS wells matching those strains/constructs. If a set of * plates is specified, only wells in plates that are in that set will be considered. * @param db The DB to query for well information. * @param searchStrains A list of strain ids (MSIDs) for which to search. * @param searchConstructs A list of construct ids for which to search. * @param restrictToPlateIds An optional set of plates on which to filter wells. * @param takeOnePerStrainOrConstruct Only select one sample per strain or construct (useful for negative controls). * @return A list of LCMS wells containing the specified strains/constructs, and the set of plate ids for those wells. * @throws SQLException */ public static Pair<List<LCMSWell>, Set<Integer>> extractWellsAndPlateIds( DB db, String[] searchStrains, String[] searchConstructs, Set<Integer> restrictToPlateIds, boolean takeOnePerStrainOrConstruct) throws SQLException { String[] strains = ensureNonNull(searchStrains); String[] constructs = ensureNonNull(searchConstructs); List<LCMSWell> matchingWells = new ArrayList<>(); Set<Integer> seenWellIds = new HashSet<>(); Set<Integer> seenPlateIds = new HashSet<>(); Set<String> selectedStrains = new HashSet<>(); Set<String> selectedConstructs = new HashSet<>(); for (String s : strains) { List<LCMSWell> res = LCMSWell.getInstance().getByStrain(db, s); for (LCMSWell well : res) { if (restrictToPlateIds != null && !restrictToPlateIds.contains(well.getPlateId())) { continue; } // Skip this well if we've already selected a sample with the same MSID. if (takeOnePerStrainOrConstruct) { if (selectedStrains.contains(well.getMsid())) { continue; } } if (!seenWellIds.contains(well.getId())) { matchingWells.add(well); seenWellIds.add(well.getId()); seenPlateIds.add(well.getPlateId()); if (takeOnePerStrainOrConstruct) { // Save the strain and construct for filtering if we only want to pick one. selectedConstructs.add(well.getComposition()); selectedStrains.add(well.getMsid()); break; } } } } for (String c : constructs) { List<LCMSWell> res = LCMSWell.getInstance().getByConstructID(db, c); for (LCMSWell well : res) { if (restrictToPlateIds != null && !restrictToPlateIds.contains(well.getPlateId())) { continue; } if (takeOnePerStrainOrConstruct) { if (selectedConstructs.contains(well.getComposition())) { continue; } } if (!seenWellIds.contains(well.getId())) { matchingWells.add(well); seenWellIds.add(well.getId()); seenPlateIds.add(well.getPlateId()); if (takeOnePerStrainOrConstruct) { // Just save the construct for filtering since we won't consider strain again. selectedConstructs.add(well.getComposition()); break; } } } } return Pair.of(matchingWells, seenPlateIds); } }