/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer.pubchem;
import act.shared.Chemical;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.json.JSONArray;
import org.json.JSONObject;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* This inner class exists as a serializable container for features extracted from PC-Compound documents. We can go
* directly to Chemical objects if this intermediate representation turns out to be unnecessary.
*/
public class PubchemEntry implements Serializable {
private static final long serialVersionUID = -6542683222963930035L;
// TODO: use a builder for this instead of constructing and mutating.
@JsonProperty("IUPAC_names")
private Map<String, String> names = new HashMap<>(5); // There tend to be five name variants per chemical.
@JsonProperty("pubchem_ids")
private List<Long> pubchemIds = new ArrayList<>(1); // Hopefully there's only one id.
@JsonProperty("InChI")
private String inchi;
/* Use a list for SMILES rather than a set to maintain order. We could use a LinkedHashSet instead, but the
* computational overhead of maintaining a set doesn't seem worth removing a few duplicated SMILES.
* TODO: check that pubchem doesn't have many dupes. */
@JsonProperty("SMILES")
private List<String> smiles = new ArrayList<>(1); // Hopefully there's only one SMILES too!
// For general use.
public PubchemEntry(Long pubchemId) {
this.pubchemIds.add(pubchemId);
}
// For deserialization.
public PubchemEntry(Map<String, String> names, List<Long> pubchemIds, String inchi, List<String> smiles) {
this.names = names;
this.pubchemIds = pubchemIds;
this.inchi = inchi;
this.smiles = smiles;
}
public Map<String, String> getNames() {
return names;
}
public void setNameByType(String type, String value) {
names.put(type, value);
}
public List<Long> getPubchemIds() {
return pubchemIds;
}
public void appendPubchemId(Long pubchemId) {
pubchemIds.add(pubchemId);
}
public String getInchi() {
return inchi;
}
public void setInchi(String inchi) {
this.inchi = inchi;
}
public List<String> getSmiles() {
return this.smiles;
}
public void appendSmiles(String smiles) {
this.smiles.add(smiles);
}
public Chemical asChemical() {
Chemical c = new Chemical(this.inchi);
if (this.smiles.size() > 0) {
c.setSmiles(this.smiles.get(0)); // Just use the first SMILES we find as the primary.
}
c.setPubchem(this.getPubchemIds().get(0)); // Assume we'll have at least one id to start with.
for (Map.Entry<String, String> entry : names.entrySet()) {
c.addNames(entry.getKey(), new String[] { entry.getValue() });
}
c.putRef(Chemical.REFS.ALT_PUBCHEM,
new JSONObject().
put("ids", new JSONArray(pubchemIds.toArray(new Long[pubchemIds.size()]))).
put("smiles", new JSONArray(smiles.toArray(new String[smiles.size()])))
);
return c;
}
}