/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer;
import act.shared.ConsistentInChI;
import act.shared.Chemical;
import act.shared.Chemical.REFS;
import act.shared.helpers.InchiMapKey;
import act.shared.helpers.MongoDBToJSON;
import com.mongodb.DBObject;
import com.mongodb.util.JSON;
import org.json.JSONObject;
import java.io.Serializable;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
class Xref implements Serializable {
private static final long serialVersionUID = 1L;
// have to use DBObject internally instead of JSONObject
// because we need it to be Serializable; and JSONObject
// is not.
HashMap<REFS, DBObject> json;
Xref() {
this.json = new HashMap<REFS, DBObject>();
}
public boolean containsType(REFS typ) {
return this.json.containsKey(typ);
}
public JSONObject get(REFS typ) {
return MongoDBToJSON.conv(this.json.get(typ));
}
public void add(REFS typ, JSONObject doc) {
this.json.put(typ, MongoDBToJSON.conv(doc));
}
}
public class ImportantChemicals {
HashMap<InchiMapKey, Xref> chems;
Set<String> all_inchis;
Set<String> done;
ImportantChemicals() {
this.done = new HashSet<String>();
this.chems = new HashMap<>();
this.all_inchis = new HashSet<String>();
}
public void parseAndAdd(String row) throws Exception {
// We assume that the format is "DB_SRC\tDB_SPECIFIC_XREF\tInChI\tJSON_METADATA"
// DB_SRC has type Chemical.REFS
String[] entry = row.split("\t", -2); // the neg limit means that it keeps the trailing empty strings
REFS typ = null;
String typ_str = entry[0], dbid = entry[1], inchi = entry[2], meta = entry[3];
inchi = ConsistentInChI.consistentInChI(inchi, "Important Chemicals"); // round trip inchi to make it consistent with the rest of the system
JSONObject doc = new JSONObject();
try {
typ = REFS.valueOf(typ_str);
} catch (Exception e) {
System.err.println("Invalid important chemicals row: " + row);
}
doc.put("dbid", dbid);
doc.put("metadata", JSON.parse(meta));
InchiMapKey large_inchi = new InchiMapKey(inchi);
if (this.chems.containsKey(large_inchi)) {
Xref xref = this.chems.get(large_inchi);
if (xref.containsType(typ)) {
// duplicate mapping... hmm... one needs to be ignored or they are redundant.
// the only thing to check are dbid and metadata as the others are identical by construction
JSONObject o = xref.get(typ);
if (o.get("dbid") != dbid || o.get("metadata") != meta) {
// conflicting entry.. error message
System.err.println("ImportantChemicals: conflicting entry! leaving the old one in there.");
} else {
// redundant entry.. do nothing
}
} else {
xref.add(typ, doc); // does not already have a mapping. add indiscrimately
}
} else {
Xref xref = new Xref();
xref.add(typ, doc);
this.chems.put(large_inchi, xref);
this.all_inchis.add(inchi);
}
}
public void setRefs(Chemical c) throws Exception {
String index = c.getInChI();
InchiMapKey inchi = new InchiMapKey(index);
if (this.chems.containsKey(inchi)) {
// we have data on this node, so add that to the chem
Xref ref = this.chems.get(inchi);
for (REFS typ : ref.json.keySet())
c.putRef(typ, MongoDBToJSON.conv(ref.json.get(typ)));
System.out.println("Added ref to " + index);
}
this.done.add(index);
return;
}
public Set<Chemical> remaining() throws Exception {
// we keep a flag about which chemicals have been called setRefs on and which are not
// For the ones that are not, we need to create a chemical using its
Set<Chemical> rem = new HashSet<Chemical>();
for (String inchi : all_inchis) {
if (done.contains(inchi))
continue;
Chemical c = new Chemical(inchi);
setRefs(c);
// the UUID here does not matter, since db.submitActChemicalToDB computes the next available uuid
rem.add(c);
System.out.println("Remaining " + inchi);
}
return rem;
}
}