/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer.metacyc.entities;
import act.installer.metacyc.BPElement;
import org.biopax.paxtools.model.level3.StructureFormatType;
import act.installer.metacyc.OrganismComposition;
import act.installer.metacyc.JsonHelper;
import org.json.JSONObject;
public class ChemicalStructure extends BPElement {
StructureFormatType format; // Enum.{CML, InChI, SMILES}
String structureData; // xml data corresponding to the structure's CML
// has < > " escapes in it but otherwise
// valid cml that obabel -icml mol.cml -osmiles
// converts to smiles. but they do contains "R"
// sometimes, that obabel converts to "*" in the
// smiles output. But converting to inchi for those
// fails. So we have to be careful about those.
// In metacyc the structures are in CML format. So we compute inchis and smiles
// using openbabel, but as a postprocessing operation.
String smiles;
// As of 20150909, ChemicalStructure entries may represent their structure with InChIs directly.
String inchi;
public String getStructure() { return this.structureData; }
public StructureFormatType getOriginalFormat() { return this.format; }
public String getSMILES() { return this.smiles; }
public String getInChI() { return this.inchi; }
public ChemicalStructure(BPElement basics, StructureFormatType format, String structureData) {
super(basics);
this.format = format;
this.structureData = structureData;
handleStructureData();
}
static final String pre = "<string title=\"smiles\">";
static final int prelen = pre.length();
static final String post = "</string>";
public void handleStructureData() {
switch (this.format) {
case SMILES:
this.smiles = this.structureData; // already in SMILES format
break;
case CML:
int start = this.structureData.indexOf(pre) + prelen;
int end = this.structureData.indexOf(post);
if (!(start == -1 || end == -1)) {
this.smiles = this.structureData.substring(start, end);
} else {
System.err.println("ERROR: Received CML without SMILES");
}
break;
case InChI:
if (this.structureData.startsWith("InChI=")) {
this.inchi = this.structureData;
} else {
System.err.println("ERROR: Found badly formed InChI chemical structure: " + this.structureData);
}
break;
default:
System.err.println("WARNING: Received unknown chemical structure format: " + format);
break;
}
}
public JSONObject expandedJSON(OrganismComposition src) {
JsonHelper o = new JsonHelper(src);
o.add("smiles", smiles);
return o.getJSON();
}
}