/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.shared;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import act.shared.helpers.MongoDBToJSON;
import chemaxon.formats.MolFormatException;
import chemaxon.formats.MolImporter;
import chemaxon.struc.Molecule;
import com.act.biointerpretation.mechanisminspection.ErosCorpus;
import com.ggasoftware.indigo.Indigo;
import com.ggasoftware.indigo.IndigoInchi;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONObject;
public class Chemical implements Serializable {
private static final long serialVersionUID = 42L;
private static final Logger LOGGER = LogManager.getFormatterLogger(Chemical.class);
private static final String INCHI_FORMAT = "inchi";
private static final String SMARTS_FORMAT = "smarts";
public Chemical() { /* default constructor for serialization */ }
private Long uuid, pubchem_id;
private String canon, smiles, inchi, inchiKey;
private boolean isCofactor, isNative;
public enum REFS { WIKIPEDIA, KEGG_DRUG, SIGMA, HSDB, DRUGBANK, WHO, SIGMA_POLYMER, PUBCHEM_TOX, TOXLINE, DEA, ALT_PUBCHEM, CHEBI, pubmed, genbank, KEGG, METACYC, BRENDA, CURATED, BING, HMDB }
private HashMap<REFS, JSONObject> refs;
private Double estimatedEnergy;
private Map<String,String[]> names; //pubchem names (type,name)
private List<String> synonyms; //more pubchem synonyms
private List<String> brendaNames; //names used in brenda
private Set<String> keywords;
private Set<String> caseInsensitiveKeywords;
private List<Integer> substructureRoIds = new ArrayList<>();
private Integer chemspider_id = -1;
private Integer chemspider_num_unique_vendors = -1;
private JSONArray chemspider_vendor_xrefs = new JSONArray();
/*
* If storing to db, this uuid will be ignored.
*/
public Chemical(Long uuid) {
this.uuid = uuid;
this.isCofactor = false;
this.refs = new HashMap<REFS, JSONObject>();
this.names = new HashMap<String,String[]>();
this.synonyms = new ArrayList<String>();
this.brendaNames = new ArrayList<String>();
this.keywords = new HashSet<String>();
this.caseInsensitiveKeywords = new HashSet<String>();
}
public Chemical(String inchi) {
this((long) -1);
this.setInchi(inchi); // this also sets the inchiKey
this.isCofactor = false;
this.refs = new HashMap<REFS, JSONObject>();
// deliberately do not map typ's to empty strings
// null values should be checked for
// for (REFS typ : REFS.values())
// this.refs.put(typ, "");
}
public Chemical(long uuid, Long pubchem_id, String canon, String smiles) {
this(uuid);
this.pubchem_id = pubchem_id;
this.canon = canon;
this.smiles = smiles;
this.inchi = null;
this.isCofactor = false;
this.refs = new HashMap<REFS, JSONObject>();
}
public Chemical createNewByMerge(Chemical x) {
/* x.uuid can be different from this.uuid as they are system generated */
boolean consistent = (x.inchi != null && this.inchi != null && x.inchi.equals(this.inchi));
if (!consistent)
return null;
// now create a copy
System.err.format("-- new merged copy from %s/", this.uuid);
System.err.format("%s/", this.pubchem_id);
System.err.format("%s/", this.canon);
System.err.format("%s/", this.smiles);
System.err.format("%s/", this.inchi);
System.err.println();
Chemical c = new Chemical(this.uuid, this.pubchem_id, this.canon, this.smiles);
c.setInchi(this.inchi);
/*
* merge the following fields:
*
isCofactor, isNative;
HashMap<REFS, JSONObject> refs;
Map<String,String[]> names;
List<String> synonyms;
List<String> brendaNames;
*/
if (this.isCofactor || x.isCofactor) c.setAsCofactor();
if (this.isNative || x.isNative) c.setAsNative();
c.refs = new HashMap<REFS, JSONObject>(this.refs);
for (REFS typ : x.refs.keySet())
if (!c.refs.containsKey(typ))
c.refs.put(typ, x.refs.get(typ));
c.names = new HashMap<String,String[]>(this.names);
c.synonyms = new ArrayList<String>(this.synonyms);
c.brendaNames = new ArrayList<String>(this.brendaNames);
for (String n : x.names.keySet())
if (!c.names.containsKey(n))
c.names.put(n, x.names.get(n));
for (String n : x.synonyms)
if (!c.synonyms.contains(n))
c.synonyms.add(n);
for (String n : x.brendaNames)
if (!c.brendaNames.contains(n))
c.brendaNames.add(n);
for (String k : this.getKeywords())
c.addKeyword(k);
for (String k : this.getCaseInsensitiveKeywords())
c.addCaseInsensitiveKeyword(k);
c.setChemSpiderID(this.getChemSpiderID());
c.setChemSpiderNumUniqueVendors(this.getChemSpiderNumUniqueVendors());
c.setChemSpiderVendorXrefs(this.getChemSpiderVendorXrefs());
// if canonical name and pubchem_id are different then add them as an ALT PUBCHEM
boolean inchiKeySame = x.inchiKey != null && this.inchiKey != null && x.inchiKey.equals(this.inchiKey);
boolean canonSame = x.canon != null && this.canon != null && x.canon.equals(this.canon);
boolean smilesSame = x.smiles != null && this.smiles != null && x.smiles.equals(this.smiles);
boolean pubchemSame = x.pubchem_id == this.pubchem_id;
if (canonSame && pubchemSame && smilesSame && inchiKeySame)
return c;
JSONObject entry = new JSONObject();
entry.put("canonical", x.canon);
entry.put("pubchem", x.pubchem_id);
entry.put("smiles", x.smiles);
entry.put("inchiKey", x.inchiKey);
JSONObject altPubchemList;
if (c.refs.containsKey(REFS.ALT_PUBCHEM))
altPubchemList = x.refs.get(REFS.ALT_PUBCHEM);
else
c.refs.put(REFS.ALT_PUBCHEM, altPubchemList = new JSONObject());
altPubchemList.put("alt_pubchem", entry);
System.err.format("ALT PUBCHEM on: %s\n", this.inchi);
return c;
}
/**
* Tries to import molecule using inchi and smarts.
* Returns empty if neither is possible.
*
* @return The molecule corresponding to this chemical.
*/
public Molecule importAsMolecule() throws MolFormatException {
try {
return MolImporter.importMol(this.getInChI(), INCHI_FORMAT);
} catch (MolFormatException e1) {
LOGGER.warn("Couldn't import chemical %d from inchi. %s", this.getUuid(), e1.getMessage());
}
try {
return MolImporter.importMol(this.getSmiles(), SMARTS_FORMAT);
} catch (MolFormatException e2) {
throw new MolFormatException("Chemical " + getUuid() + " couldn't be imported from either inchi or smarts.");
}
}
public Set<String> getKeywords() { return this.keywords; }
public void addKeyword(String k) { this.keywords.add(k); }
public Set<String> getCaseInsensitiveKeywords() { return this.caseInsensitiveKeywords; }
public void addCaseInsensitiveKeyword(String k) { this.caseInsensitiveKeywords.add(k); }
public void putRef(REFS typ, JSONObject entry) {
this.refs.put(typ, entry);
}
/*
* Add pubchem names. Pubchem categorizes names as:
* Allowed
* Preferred
* Traditional
* Systematic
* CAS-like Style
* Arbitrarily using the first "Preferred" name as our canonical.
*/
public void addNames(String type, String[]names) {
if(type.equals("Preferred") && names.length > 0) {
setCanon(names[0]);
}
this.names.put(type, names);
}
public void addSynonym(String syn) { synonyms.add(syn.toLowerCase()); }
public void addBrendaNames(String name) { brendaNames.add(name); }
public void addSubstructureRoId(Integer id) { substructureRoIds.add(id); }
public List<Integer> getSubstructureRoIds() { return substructureRoIds; }
public void setCanon(String canon) { this.canon = canon; }
public void setPubchem(Long pubchem) { this.pubchem_id = pubchem; }
public void setChemSpiderID(Integer csid) { this.chemspider_id = (csid == null ? -1 : csid); }
public void setChemSpiderNumUniqueVendors(Integer n) { this.chemspider_num_unique_vendors = (n == null ? -1 : n); }
public void setChemSpiderVendorXrefs(JSONArray v) { this.chemspider_vendor_xrefs = (v == null ? new JSONArray() : v); }
public void setSmiles(String s) { smiles = s; }
public void setInchiKey(String s) { inchiKey = s; }
public void setInchi(String s) {
this.inchi = s;
// compute the inchikey and install it as well.
// but make an exception for:
// 1. big molecules and abstractions that have a fake inchi, (from metacyc and metacyc)
// 2. corrupt inchis (from wikipedia mining)
// 3. big molecules and abstraction with no inchi (from kegg)
if (!s.startsWith("InChI=/FAKE/METACYC") // 1.
&& !s.startsWith("InChI=/FAKE/BRENDA") // 1.
&& !s.startsWith("InChI'('") // 2.
&& !s.startsWith("InChI1'('") // 2.
&& !s.startsWith("none") // 3.
) {
try {
String key = new IndigoInchi(new Indigo()).getInchiKey(inchi);
this.inchiKey = key;
} catch(Exception e) {
System.out.println("Failed to compute InChIKey for: " + inchi);
}
}
};
// TODO: remove this when safe to do so since we can explicitly specify a value with setIsCofactor.
public void setAsCofactor() { this.isCofactor = true; }
public void setIsCofactor(boolean isCofactor) { this.isCofactor = isCofactor; }
public void setAsNative() { this.isNative = true; }
public void setEstimatedEnergy(Double e) { this.estimatedEnergy = e; }
public Long getUuid() { return uuid; }
/*
* Should be null if no pubchem entry.
*/
public Long getPubchemID() { return pubchem_id; }
/*
* Canonical name can be null.
* Happens when no pubchem "Preferred" name or no pubchem entry.
*/
public String getCanon() { return canon; }
/*
* Returns null only if bad InChI.
*/
public String getSmiles() {
return smiles;
}
public boolean isCofactor() {
return this.isCofactor;
}
public boolean isNative() {
return this.isNative;
}
public String getInChIKey() {
return inchiKey;
}
public Map<REFS, BasicDBObject> getXrefMap() {
Map<REFS, BasicDBObject> newXrefs = new HashMap<>();
this.refs.forEach((key, value) -> newXrefs.put(key, (BasicDBObject) MongoDBToJSON.conv(value)));
return newXrefs;
}
public JSONObject getRef(REFS type) {
return this.refs.get(type);
}
public Object getRef(REFS type, String[] xpath) {
JSONObject o = this.refs.get(type);
if (o == null)
return null;
for (int i = 0; i < xpath.length; i++) {
if (!o.has(xpath[i]))
return null;
if (i == xpath.length - 1) {
// need to return this object, irrespective of whether it is a Object or not
return o.get(xpath[i]);
} else {
o = (JSONObject) o.get(xpath[i]);
if (o == null)
return null;
}
}
return null; // unreachable
}
public Double getRefMetric(REFS typ) {
if (typ == REFS.SIGMA) {
// for sigma the metric we have is price...
JSONObject d = (JSONObject)((JSONObject)this.refs.get(REFS.SIGMA)).get("metadata");
if (d.has("price")) {
Double price = null;
try {
price = Double.parseDouble((String)d.get("price"));
} catch (NumberFormatException e) {
// there is 1 entry that wierdly has "price" : "price" in the DB
// xref.WIKIPEDIA.dbid: http://en.wikipedia.org/wiki/Castanospermine
// InChI=1S/C8H15NO4/c10-4-1-2-9-3-5(11)7(12)8(13)6(4)9/h4-8,10-13H,1-3H2/t4-,5-,6+,7+,8+/m0/s1
return null;
}
if (d.has("gramquant")) {
Double perGramPrice = price / Double.parseDouble((String)d.get("gramquant"));
return (double)Math.round(perGramPrice * 100) / 100;
} else {
// sometimes entries are liquid sizes, e.g., "quantity" : "1ML" , "cas" : "64-04-0" , "price" : "17.80"
// and then they do not have the gramquant field associated with them.
return price + 0.000009999; // tag these cases so that at least we can identify them in the UI
}
}
} else if (typ == REFS.DRUGBANK) {
JSONObject d = (JSONObject)((JSONObject)this.refs.get(REFS.DRUGBANK)).get("metadata");
if (d.has("prices")) {
JSONObject prices = (JSONObject)d.get("prices");
if (prices.has("price")) {
Object price = prices.get("price");
if (price instanceof JSONArray) {
// more than one price entry
Double max = Double.NEGATIVE_INFINITY, cost;
for (int i = 0; i < ((JSONArray)price).length(); i++) {
JSONObject o = (JSONObject)((JSONArray)price).get(i);
if (o.has("cost")) {
cost = Double.parseDouble(o.getString("cost"));
max = max < cost ? cost : max;
}
}
if (max != Double.NEGATIVE_INFINITY)
return max;
} else if (((JSONObject)price).has("cost")) {
// since entry and it contains cost field
return Double.parseDouble((String)((JSONObject)price).get("cost"));
}
}
}
}
return null; // no reasonable metric known
}
/*
* If reading from db, this should never return null.
*/
public String getInChI() { return inchi; }
public Integer getChemSpiderID() { return this.chemspider_id; }
public Integer getChemSpiderNumUniqueVendors() { return this.chemspider_num_unique_vendors; }
public JSONArray getChemSpiderVendorXrefs() { return this.chemspider_vendor_xrefs; }
public List<String> getSynonyms() { return synonyms; }
public List<String> getBrendaNames() { return brendaNames; }
public Map<String, String[]> getPubchemNames() {
return names;
}
public String[] getPubchemNames(String type) {
return names.get(type);
}
public Set<String> getPubchemNameTypes() {
return names.keySet();
}
public String getShortestName() {
String shortest = canon;
for (String s : synonyms) {
if (shortest == null || s.length() < shortest.length()) {
shortest = s;
}
}
for(String s : brendaNames) {
if (shortest == null || s.length() < shortest.length()) {
shortest = s;
}
}
return shortest;
}
public String getShortestBRENDAName() {
String shortest = canon;
for(String s : brendaNames) {
if (shortest == null || s.length() < shortest.length()) {
shortest = s;
}
}
return shortest;
}
public String getFirstName() {
String first = "no_name";
if (brendaNames.size() != 0)
first = brendaNames.get(0);
else if (synonyms.size() != 0)
first = synonyms.get(0);
return first;
}
//TODO: incomplete
public String getFewestAlphaName() {
String shortest = canon;
String shortestAlpha = new String(canon);
shortestAlpha.replaceAll("[0-9]", "");
for (String s : synonyms) {
if (shortest == null || s.length() < shortest.length()) {
shortest = s;
}
}
for(String s : brendaNames) {
if (shortest == null || s.length() < shortest.length()) {
shortest = s;
}
}
return shortest;
}
public Double getEstimatedEnergy() { return estimatedEnergy; }
@Override
public String toString() {
return "UUID: " + uuid +
" \n PubchemID: " + pubchem_id +
" \n Canon: " + canon +
" \n Smiles: " + getSmiles() +
" \n InChI: " + inchi +
" \n InChIKey: " + getInChIKey();
}
public String toStringDetail() {
return "ID: " + uuid +
" \n PubchemID: " + pubchem_id +
" \n Canon: " + canon +
" \n Smiles: " + getSmiles() +
" \n InChI: " + inchi +
" \n InChIKey: " + getInChIKey() +
" \n Names: " + names + "; " + synonyms + "; " + brendaNames +
" \n Refs: " + refs +
" \n IsCofactor, IsNative: " + isCofactor + ", " + isNative +
" \n EstimatedEnergy: " + estimatedEnergy;
}
public static Set<Long> getChemicalIDs(Collection<Chemical> chemicals) {
Set<Long> result = new HashSet<Long>();
for (Chemical chemical : chemicals) {
result.add(chemical.getUuid());
}
return result;
}
}