/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package com.act.lcms.db.model;
import com.act.lcms.db.io.DB;
import com.act.lcms.MassCalculator;
import com.act.utils.TSVParser;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
public class CuratedChemical {
public static final String TABLE_NAME = "chemicals_curated";
public static String getTableName() {
return TABLE_NAME;
}
private enum DB_FIELD implements DBFieldEnumeration {
ID(1, -1, "id"),
NAME(2, 1, "name"),
INCHI(3, 2, "inchi"),
MASS(4, 3, "mass"),
EXPECTED_COLLISION_VOLTAGE(5, 4, "expected_collision_voltage"),
REFERENCE_URL(6, 5, "reference_url"),
;
private final int offset;
private final int insertUpdateOffset;
private final String fieldName;
DB_FIELD(int offset, int insertUpdateOffset, String fieldName) {
this.offset = offset;
this.insertUpdateOffset = insertUpdateOffset;
this.fieldName = fieldName;
}
@Override
public int getOffset() {
return offset;
}
@Override
public int getInsertUpdateOffset() {
return insertUpdateOffset;
}
@Override
public String getFieldName() {
return fieldName;
}
@Override
public String toString() {
return this.fieldName;
}
public static String[] names() {
DB_FIELD[] values = DB_FIELD.values();
String[] names = new String[values.length];
for (int i = 0; i < values.length; i++) {
names[i] = values[i].getFieldName();
}
return names;
}
}
// TODO: it might be easier to use parts of Spring-Standalone to do named binding in these queries.
protected static final List<String> ALL_FIELDS = Collections.unmodifiableList(Arrays.asList(DB_FIELD.names()));
// id is auto-generated on insertion.
protected static final List<String> INSERT_UPDATE_FIELDS =
Collections.unmodifiableList(ALL_FIELDS.subList(1, ALL_FIELDS.size()));
protected static List<CuratedChemical> fromResultSet(ResultSet resultSet) throws SQLException {
List<CuratedChemical> results = new ArrayList<>();
while (resultSet.next()) {
Integer id = resultSet.getInt(DB_FIELD.ID.getOffset());
String name = resultSet.getString(DB_FIELD.NAME.getOffset());
String inchi = resultSet.getString(DB_FIELD.INCHI.getOffset());
Double mass = resultSet.getDouble(DB_FIELD.MASS.getOffset());
Integer expectedCollisionVoltage = resultSet.getInt(DB_FIELD.EXPECTED_COLLISION_VOLTAGE.getOffset());
if (resultSet.wasNull()) {
expectedCollisionVoltage = null;
}
String referenceUrl = resultSet.getString(DB_FIELD.REFERENCE_URL.getOffset());
results.add(new CuratedChemical(id, name, inchi, mass, expectedCollisionVoltage, referenceUrl));
}
return results;
}
protected static CuratedChemical expectOneResult(ResultSet resultSet, String queryErrStr) throws SQLException{
List<CuratedChemical> results = fromResultSet(resultSet);
if (results.size() > 1) {
throw new SQLException("Found multiple results where one or zero expected: %s", queryErrStr);
}
if (results.size() == 0) {
return null;
}
return results.get(0);
}
// Select
public static final String QUERY_GET_CURATED_CHEMICAL_BY_ID = StringUtils.join(new String[]{
"SELECT", StringUtils.join(ALL_FIELDS, ", "),
"from", TABLE_NAME,
"where id = ?",
}, " ");
public static CuratedChemical getCuratedChemicalById(DB db, Integer id) throws SQLException {
try (PreparedStatement stmt = db.getConn().prepareStatement(QUERY_GET_CURATED_CHEMICAL_BY_ID)) {
stmt.setInt(1, id);
try (ResultSet resultSet = stmt.executeQuery()) {
return expectOneResult(resultSet, String.format("id = %d", id));
}
}
}
public static final String QUERY_GET_CURATED_CHEMICAL_BY_NAME = StringUtils.join(new String[]{
"SELECT", StringUtils.join(ALL_FIELDS, ", "),
"from", TABLE_NAME,
"where name = ?",
}, " ");
public static CuratedChemical getCuratedChemicalByName(DB db, String name) throws SQLException {
try (PreparedStatement stmt = db.getConn().prepareStatement(QUERY_GET_CURATED_CHEMICAL_BY_NAME)) {
stmt.setString(1, name);
try (ResultSet resultSet = stmt.executeQuery()) {
return expectOneResult(resultSet, String.format("name = %s", name));
}
}
}
public static final String QUERY_GET_CURATED_CHEMICAL_BY_INCHI = StringUtils.join(new String[]{
"SELECT", StringUtils.join(ALL_FIELDS, ", "),
"from", TABLE_NAME,
"where inchi = ?",
}, " ");
public static CuratedChemical getCuratedChemicalByInChI(DB db, String inchi) throws SQLException {
try (PreparedStatement stmt = db.getConn().prepareStatement(QUERY_GET_CURATED_CHEMICAL_BY_INCHI)) {
stmt.setString(1, inchi);
try (ResultSet resultSet = stmt.executeQuery()) {
return expectOneResult(resultSet, String.format("inchi = %s", inchi));
}
}
}
// Insert/Update
public static final String QUERY_INSERT_CURATED_CHEMICAL = StringUtils.join(new String[] {
"INSERT INTO", TABLE_NAME, "(", StringUtils.join(INSERT_UPDATE_FIELDS, ", "), ") VALUES (",
"?,", // 1 = name
"?,", // 2 = inchi
"?,", // 3 = m_plus_h_plus_mass
"?,", // 4 = expected_collision_voltage
"?", // 5 = reference_url
")"
}, " ");
protected static void bindInsertOrUpdateParameters(
PreparedStatement stmt, String name, String inchi, Double mPlusHPlusMass,
Integer expectedCollisionVoltage, String referenceUrl) throws SQLException {
stmt.setString(DB_FIELD.NAME.getInsertUpdateOffset(), name);
stmt.setString(DB_FIELD.INCHI.getInsertUpdateOffset(), inchi);
stmt.setDouble(DB_FIELD.MASS.getInsertUpdateOffset(), mPlusHPlusMass);
if (expectedCollisionVoltage != null) {
stmt.setInt(DB_FIELD.EXPECTED_COLLISION_VOLTAGE.getInsertUpdateOffset(), expectedCollisionVoltage);
} else {
stmt.setNull(DB_FIELD.EXPECTED_COLLISION_VOLTAGE.getInsertUpdateOffset(), Types.INTEGER);
}
stmt.setString(DB_FIELD.REFERENCE_URL.getInsertUpdateOffset(), referenceUrl);
}
// TODO: this could return the number of parameters it bound to make it easier to set additional params.
protected static void bindInsertOrUpdateParameters(PreparedStatement stmt, CuratedChemical c) throws SQLException {
bindInsertOrUpdateParameters(stmt, c.getName(), c.getInchi(), c.getMass(),
c.getExpectedCollisionVoltage(), c.getReferenceUrl());
}
public static CuratedChemical insertCuratedChemical(
DB db,
String name, String inchi, Double mPlusHPlusMass,
Integer expectedCollisionVoltage, String referenceUrl) throws SQLException {
Connection conn = db.getConn();
try (PreparedStatement stmt =
conn.prepareStatement(QUERY_INSERT_CURATED_CHEMICAL, Statement.RETURN_GENERATED_KEYS)) {
bindInsertOrUpdateParameters(stmt, name, inchi, mPlusHPlusMass, expectedCollisionVoltage, referenceUrl);
stmt.executeUpdate();
try (ResultSet resultSet = stmt.getGeneratedKeys()) {
if (resultSet.next()) {
// Get auto-generated id.
int id = resultSet.getInt(1);
return new CuratedChemical(id, name, inchi, mPlusHPlusMass, expectedCollisionVoltage, referenceUrl);
} else {
System.err.format("ERROR: could not retrieve autogenerated key for curated chemical %s\n", name);
return null;
}
}
}
}
protected static final List<String> UPDATE_STATEMENT_FIELDS_AND_BINDINGS;
static {
List<String> fields = new ArrayList<>(INSERT_UPDATE_FIELDS.size());
for (String field : INSERT_UPDATE_FIELDS) {
fields.add(String.format("%s = ?", field));
}
UPDATE_STATEMENT_FIELDS_AND_BINDINGS = Collections.unmodifiableList(fields);
}
public static final String QUERY_UPDATE_CURATED_CHEMICAL_BY_ID = StringUtils.join(new String[] {
"UPDATE", TABLE_NAME, "SET",
StringUtils.join(UPDATE_STATEMENT_FIELDS_AND_BINDINGS.iterator(), ", "),
"WHERE",
"id = ?",
}, " ");
public static boolean updateCuratedChemical(DB db, CuratedChemical chem) throws SQLException {
Connection conn = db.getConn();
try (PreparedStatement stmt = conn.prepareStatement(QUERY_UPDATE_CURATED_CHEMICAL_BY_ID)) {
bindInsertOrUpdateParameters(stmt, chem);
stmt.setInt(INSERT_UPDATE_FIELDS.size() + 1, chem.getId());
return stmt.executeUpdate() > 0;
}
}
// Parsing/loading
public static List<Pair<Integer, DB.OPERATION_PERFORMED>> insertOrUpdateCuratedChemicalsFromTSV(
DB db, TSVParser parser) throws SQLException{
List<Map<String, String>> entries = parser.getResults();
List<Pair<Integer, DB.OPERATION_PERFORMED>> operationsPerformed = new ArrayList<>(entries.size());
for (Map<String, String> entry : entries) {
String name = entry.get("name");
String inchi = entry.get("inchi");
String massStr = entry.get("[M+H]+");
String expectedCollisionVoltageStr = entry.get("collision_voltage");
String referenceUrl = entry.get("reference_url");
if (name == null || name.isEmpty() ||
inchi == null || inchi.isEmpty() ||
massStr == null || massStr.isEmpty()) {
System.err.format("WARNING: missing required field for chemical '%s', skipping.\n", name);
continue;
}
Double mass = Double.parseDouble(massStr);
Integer expectedCollisionVoltage = expectedCollisionVoltageStr == null || expectedCollisionVoltageStr.isEmpty() ?
null : Integer.parseInt(expectedCollisionVoltageStr);
if (inchi != null && !inchi.isEmpty() && !"null".equals(inchi)) {
Double calculatedMass = MassCalculator.calculateMass(inchi);
Double delta = calculatedMass - mass;
if (delta >= 0.01 || delta <= -0.01) {
System.err.format("WARNING: found mass discrepancy for %s: %f found where %f calculated\n",
name, mass, calculatedMass);
}
}
// TODO: should we fall back to searching by name if we can't find the InChI?
CuratedChemical chem = getCuratedChemicalByInChI(db, inchi);
DB.OPERATION_PERFORMED op = null;
if (chem == null) {
chem = insertCuratedChemical(db, name, inchi, mass, expectedCollisionVoltage, referenceUrl);
op = DB.OPERATION_PERFORMED.CREATE;
} else {
chem.setName(name);
chem.setInchi(inchi);
chem.setMass(mass);
chem.setExpectedCollisionVoltage(expectedCollisionVoltage);
chem.setReferenceUrl(referenceUrl);
updateCuratedChemical(db, chem);
op = DB.OPERATION_PERFORMED.UPDATE;
}
// Chem should only be null if we couldn't insert the row into the DB.
if (chem == null) {
operationsPerformed.add(Pair.of((Integer)null, DB.OPERATION_PERFORMED.ERROR));
} else {
operationsPerformed.add(Pair.of(chem.getId(), op));
}
}
return operationsPerformed;
}
private Integer id;
private String name;
private String inchi;
private Double mass;
private Integer expectedCollisionVoltage;
private String referenceUrl;
public CuratedChemical(Integer id, String name, String inchi, Double mass,
Integer expectedCollisionVoltage, String referenceUrl) {
this.id = id;
this.name = name;
this.inchi = inchi;
this.mass = mass;
this.expectedCollisionVoltage = expectedCollisionVoltage;
this.referenceUrl = referenceUrl;
}
public Integer getId() {
return id;
}
protected void setId(Integer id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getInchi() {
return inchi;
}
public void setInchi(String inchi) {
this.inchi = inchi;
}
public Double getMass() {
return mass;
}
public void setMass(Double mass) {
this.mass = mass;
}
public Integer getExpectedCollisionVoltage() {
return expectedCollisionVoltage;
}
public void setExpectedCollisionVoltage(Integer expectedCollisionVoltage) {
this.expectedCollisionVoltage = expectedCollisionVoltage;
}
public String getReferenceUrl() {
return referenceUrl;
}
public void setReferenceUrl(String referenceUrl) {
this.referenceUrl = referenceUrl;
}
// Generated by IntelliJ.
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
CuratedChemical that = (CuratedChemical) o;
if (id != null ? !id.equals(that.id) : that.id != null) return false;
if (!name.equals(that.name)) return false;
if (!inchi.equals(that.inchi)) return false;
if (!mass.equals(that.mass)) return false;
if (expectedCollisionVoltage != null ? !expectedCollisionVoltage.equals(that.expectedCollisionVoltage) :
that.expectedCollisionVoltage != null)
return false;
return !(referenceUrl != null ? !referenceUrl.equals(that.referenceUrl) : that.referenceUrl != null);
}
// Generated by IntelliJ.
@Override
public int hashCode() {
int result = id != null ? id.hashCode() : 0;
result = 31 * result + name.hashCode();
result = 31 * result + inchi.hashCode();
result = 31 * result + mass.hashCode();
result = 31 * result + (expectedCollisionVoltage != null ? expectedCollisionVoltage.hashCode() : 0);
result = 31 * result + (referenceUrl != null ? referenceUrl.hashCode() : 0);
return result;
}
}