/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.lcms.v2;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Optional;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* A class representing a Chemical Formula and a potential name associated with it.
* The element counts are stored in a Map {Element -> count} and the class provides a convenient API
* for parsing from string, converting to string according to the Hill Order system.
*/
public class LcmsChemicalFormula implements ChemicalFormula {
// The following pattern matches element + count combinations in a formula string.
private static final Pattern ELEMENT_COUNT_PATTERN = Pattern.compile("([A-Z][a-z]?)(\\d*)");
private Map<Element, Integer> elementCounts;
private String name;
public LcmsChemicalFormula(String chemicalFormula) {
fromString(chemicalFormula);
}
public LcmsChemicalFormula(Map<Element, Integer> elementCounts) {
this.elementCounts = elementCounts;
}
public LcmsChemicalFormula(Map<Element, Integer> elementCounts, String name) {
this.elementCounts = elementCounts;
this.name = name;
}
public Map<Element, Integer> getElementCounts() {
return this.elementCounts;
}
public Integer getElementCount(Element element) {
return elementCounts.getOrDefault(element, 0);
}
public Double getMonoIsotopicMass() {
return elementCounts
.entrySet()
.stream()
.mapToDouble(entry -> entry.getKey().getMass() * entry.getValue())
.sum();
}
public Optional<String> getName() {
return Optional.ofNullable(this.name);
}
@Override
public boolean equals(Object chemicalFormula) {
return (chemicalFormula instanceof ChemicalFormula) &&
getElementCounts().equals(((ChemicalFormula) chemicalFormula).getElementCounts());
}
/**
* The following method implements the comparison of elements in a formula for the purpose of ordering them according
* to the Hill Order System, defined as follows:
* Case 1) If the formula contains Carbon:
* Carbon first, Hydrogen second, and all remaining elements in alphabetical order.
* Case 2) If no Carbon is present, all elements in alphabetical order.
* Indicate the number after each element symbol
* @param formula input formula. We need to provide it since the rules changes whether it contains a Carbon
* @return a Comparator between elements in the formula
*/
private Comparator<Element> getElementComparator(ChemicalFormula formula) {
if (formula.getElementCount(LcmsCommonElements.CARBON.getElement()) > 0) {
// Case 1) the formula contains a Carbon
// Carbon first, Hydrogen second, and all remaining elements in alphabetical order.
return (Element e1, Element e2) -> {
if (e1.getSymbol().equals(e2.getSymbol())) {
return 0;
} else if (e1.getSymbol().equals("C")) {
return -1;
} else if (e2.getSymbol().equals("C")) {
return 1;
} else if (e1.getSymbol().equals("H")) {
return -1;
} else if (e2.getSymbol().equals("H")) {
return 1;
} else {
return e1.getSymbol().compareTo(e2.getSymbol());
}
};
} else {
// Case 2) the formula does not contain a Carbon
// all elements in alphabetical order, including Hydrogen
return (Element e1, Element e2) -> e1.getSymbol().compareTo(e2.getSymbol());
}
}
private TreeMap<Element, Integer> getSortedElementCounts() {
TreeMap<Element, Integer> treeMap = new TreeMap<>(getElementComparator(this));
treeMap.putAll(elementCounts);
return treeMap;
}
/**
* Converts a Chemical Formula to its string representation following the Hill Order system described above.
* For example, the formula (C->8, H->9, N->1, O->2) would be converted as "C8H9NO2".
* @return the formula's string representation
*/
@Override
public String toString() {
StringBuilder builder = new StringBuilder();
for (Map.Entry<Element, Integer> entry : getSortedElementCounts().entrySet()) {
builder.append(entry.getKey().getSymbol());
Integer count = entry.getValue();
if (count > 1) {
builder.append(count.toString());
}
}
return builder.toString();
}
public void fromString(String formulaString) {
elementCounts = new HashMap<>();
Matcher matches = ELEMENT_COUNT_PATTERN.matcher(formulaString);
// Example: in "C8H9NO2", there will be 4 matches for this pattern, each of which having two groups.
// First match: "C8", with group 1 being "C" and group 2 being "8"
// Second match: "H9", group 1 is "H", group 2 is "9"
// Third match: "N", group 1 is "N", group 2 is "" (empty string)
// Fourth match: "O2", group 1 is "O" and group 2 is "2"
while (matches.find()) {
Element element = new LcmsElement(matches.group(1));
Integer count = (matches.group(2).equals("")) ? 1 : Integer.parseInt(matches.group(2));
elementCounts.put(element, count);
}
}
}