/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer;
import java.io.PrintStream;
import java.util.Set;
import java.util.HashSet;
import java.util.HashMap;
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.io.FileInputStream;
import java.io.IOException;
import org.json.JSONObject;
import org.json.JSONArray;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLAnnotation;
import org.semanticweb.owlapi.model.OWLIndividual;
import org.semanticweb.owlapi.model.OWLClass;
import org.semanticweb.owlapi.model.OWLLiteral;
import org.semanticweb.owlapi.model.OWLClassExpression;
import org.semanticweb.owlapi.model.OWLObjectProperty;
import org.semanticweb.owlapi.model.ClassExpressionType;
import org.semanticweb.owlapi.model.OWLException;
import org.semanticweb.owlapi.model.OWLOntology;
import org.semanticweb.owlapi.model.OWLOntologyManager;
import act.shared.helpers.P;
public class ChEBIParser {
private static int INDENT = 4;
private final OWLOntology ontology;
private final PrintStream out, err;
private HashMap<String, EntryTypes> entryTypes;
private ChEBIParser(OWLOntology _ontology) {
ontology = _ontology;
out = System.out;
err = System.err;
entryTypes = new HashMap<String, EntryTypes>();
entryTypes.put("http://purl.obolibrary.org/obo#Synonym", EntryTypes.Synonym );
entryTypes.put("http://purl.obolibrary.org/obo#Definition", EntryTypes.Definition );
entryTypes.put("http://purl.obolibrary.org/obo#SMILES", EntryTypes.SMILES );
entryTypes.put("http://purl.obolibrary.org/obo#xref", EntryTypes.xref );
entryTypes.put("http://purl.obolibrary.org/obo#InChI", EntryTypes.InChI );
entryTypes.put("http://purl.obolibrary.org/obo#InChIKey", EntryTypes.InChIKey );
entryTypes.put("http://purl.obolibrary.org/obo#has_role", EntryTypes.has_role );
entryTypes.put("http://purl.obolibrary.org/obo#has_part", EntryTypes.has_part );
entryTypes.put("http://purl.obolibrary.org/obo#has_functional_parent", EntryTypes.has_functional_parent );
entryTypes.put("http://purl.obolibrary.org/obo#has_parent_hydride", EntryTypes.has_parent_hydride );
entryTypes.put("http://purl.obolibrary.org/obo#is_conjugate_acid_of", EntryTypes.is_conjugate_acid_of );
entryTypes.put("http://purl.obolibrary.org/obo#is_conjugate_base_of", EntryTypes.is_conjugate_base_of );
entryTypes.put("http://purl.obolibrary.org/obo#is_enantiomer_of", EntryTypes.is_enantiomer_of );
entryTypes.put("http://purl.obolibrary.org/obo#is_substituent_group_from", EntryTypes.is_substituent_group_from );
entryTypes.put("http://purl.obolibrary.org/obo#is_tautomer_of", EntryTypes.is_tautomer_of );
}
public enum EntryTypes { Synonym, Definition, SMILES, xref, InChI, InChIKey,
has_role, has_part, has_functional_parent, has_parent_hydride,
is_conjugate_acid_of, is_conjugate_base_of, is_enantiomer_of,
is_substituent_group_from, is_tautomer_of
}
private HashMap<OWLClass, P<OWLClass, HashMap<String, EntryTypes>>>
getAllElementsWithParentsIn(Set<OWLClass> parentCategories) throws OWLException {
HashMap<OWLClass, P<OWLClass, HashMap<String, EntryTypes>>> selected;
selected = new HashMap<OWLClass, P<OWLClass, HashMap<String, EntryTypes>>>();
for (OWLClass cl : ontology.getClassesInSignature()) {
Set<OWLClass> parents = get_has_role_parents(cl);
HashMap<String, EntryTypes> data = getData(cl);
for (OWLClass p : parents) {
if (parentCategories.contains(p)) {
// this class has a parent within the set we are looking for.
// so it qualifies as a class to be returned.. add it to map
P<OWLClass, HashMap<String, EntryTypes>> p_data;
p_data = new P<OWLClass, HashMap<String, EntryTypes>>(p, data);
selected.put(cl, p_data);
}
}
}
return selected;
}
/**
* Print the class hierarchy for the given ontology from this class down,
* assuming this class is at the given level. Makes no attempt to deal
* sensibly with multiple inheritance.
*/
private void readSubtree(OWLClass clazz, HashMap<OWLClass, OWLClass> parents, boolean doPrint) throws OWLException {
parents.put(clazz, null); // install in parents map the root we are looking for
readSubtree(clazz, parents, 0, doPrint);
}
/**
* Print the class hierarchy from this class down, assuming this class is at
* the given level. Makes no attempt to deal sensibly with multiple
* inheritance.
*/
private void readSubtree(OWLClass clazz, HashMap<OWLClass, OWLClass> parents, int level, boolean doPrint) throws OWLException {
if (doPrint) {
for (int i = 0; i < level * INDENT; i++) {
err.print(" ");
}
err.println(labelFor(clazz, ontology));
}
/* Find the children and recurse */
for (OWLClassExpression c : clazz.getSubClasses(ontology)) {
OWLClass child = c.asOWLClass();
if (!child.equals(clazz)) {
parents.put(child, clazz); // install parents in map
readSubtree(child, parents, level + 1, doPrint); // recurse
}
}
}
private OWLClass recurseToSubtreeRoot(OWLClass curr_root, String subtree) throws OWLException {
String label = labelFor(curr_root, ontology);
if (subtree.equals(label))
return curr_root;
/* Else find the children and recurse */
OWLClass descendant = null;
for (OWLClassExpression c : curr_root.getSubClasses(ontology)) {
OWLClass child = c.asOWLClass();
if (!child.equals(curr_root)) {
descendant = recurseToSubtreeRoot(child, subtree);
if (descendant != null)
break;
}
}
return descendant;
}
private static String labelFor(OWLClass clazz, OWLOntology ontology) {
// LabelExtractor is not available in the version of OWLAPI
// Instead: Use the getAnnotations like we do in getData
System.out.println("This is unverified. ChEBI parser:");
System.out.println("LabelExtractor is not available in this OWLAPI v");
System.out.println("So we extract the label manually. But not sure");
System.out.println("if this code is correct. Need to check. Pause...");
System.console().readLine();
String chebiID = clazz.getIRI().getFragment();
String label = null;
for (OWLAnnotation a : clazz.getAnnotations(ontology)) {
// We got the "if code" below from
// http://grepcode.com/file/repo1.maven.org/maven2/net.sourceforge.owlapi/owlapi-contract/3.4/uk/ac/manchester/owl/owlapi/tutorial/LabelExtractor.java
if (a.getProperty().isLabel()) {
OWLLiteral c = (OWLLiteral) a.getValue();
label = c.getLiteral();
}
}
// OLD code using LabelExtractor:
// LabelExtractor le = new LabelExtractor();
// Set<OWLAnnotation> annotations = clazz.getAnnotations(ontology);
// for (OWLAnnotation anno : annotations) {
// anno.accept(le);
// }
// label = le.getResult();
/* Print out the label if there is one. Else ID */
if (label != null) {
return chebiID + "(" + label + ")";
} else {
return chebiID;
}
}
private Set<OWLClass> get_has_role_parents(OWLClass clazz) {
Set<OWLClass> roles = new HashSet<OWLClass>();
for (OWLClassExpression sup: clazz.getSuperClasses(ontology)) {
ClassExpressionType typ = sup.getClassExpressionType();
switch (typ) {
case OBJECT_SOME_VALUES_FROM:
Set<OWLObjectProperty> properties = sup.getObjectPropertiesInSignature();
Set<OWLClass> classes = sup.getClassesInSignature();
if (singletonPropertyHasRole(properties) && classes.size() == 1) {
OWLClass has_role_parent = classes.toArray(new OWLClass[0])[0];
roles.add(has_role_parent);
// err.println("\t\t Added parent: " + has_role_parent);
}
// else {
// err.println("Was expecting singleton sets for properties and classes.");
// err.println("Got more/less: " + properties + " " + classes);
// System.exit(-1);
// }
break;
default:
// err.println("\t Default (SubClassOf): " + sup);
break;
}
}
return roles;
}
private boolean singletonPropertyHasRole(Set<OWLObjectProperty> properties) {
if (properties.size() != 1)
return false;
for (OWLObjectProperty p : properties) {
EntryTypes prop = entryTypes.get(p.toStringID());
// err.println("\t\t ObjProp: " + prop);
// err.println("\t\t ObjProp: " + p);
if (prop == null) System.exit(-1);
return EntryTypes.has_role == prop;
}
return false;
}
private HashMap<String, EntryTypes> getData(OWLClass clazz) {
HashMap<String, EntryTypes> data = new HashMap<String, EntryTypes>();
for (OWLAnnotation a : clazz.getAnnotations(ontology)) {
// err.println("\t\t Annotation Property: " + a.getProperty());
String id = a.getProperty().toStringID();
if (id.equals("rdfs:label")) continue;
EntryTypes prop = entryTypes.get(id);
String val = ((OWLLiteral)a.getValue()).getLiteral();
// err.println("\t\t Annotation Property: " + prop);
// err.println("\t\t Annotation Literal : " + value);
data.put(val, prop);
}
return data;
}
private static void find(String subtreeRoot, IRI documentIRI, IRI inchiIRI) throws OWLException, IOException {
OWLOntologyManager manager = OWLManager.createOWLOntologyManager();
// Load the ontology...
OWLOntology ontology = manager.loadOntologyFromOntologyDocument(documentIRI);
// Print metadata about ontology...
System.err.println("Ontology Loaded...");
System.err.println("Document IRI: " + documentIRI);
System.err.println("Ontology : " + ontology.getOntologyID());
System.err.println("Format : " + manager.getOntologyFormat(ontology));
HashMap<String, String> inchis = readInchis(inchiIRI);
ChEBIParser chebi = new ChEBIParser(ontology);
OWLClass clazz = manager.getOWLDataFactory().getOWLThing();
System.err.println("Toplevel : " + clazz);
clazz = chebi.recurseToSubtreeRoot(clazz, subtreeRoot);
System.err.println("Requested : " + clazz);
// Print the hierarchy
HashMap<OWLClass, OWLClass> treeParents = new HashMap<OWLClass, OWLClass>();
boolean doPrintSubtree = true;
chebi.readSubtree(clazz, treeParents, doPrintSubtree);
Set<OWLClass> metabolite_categories = treeParents.keySet();
// System.err.println("Metabolite categories: " + metabolite_categories);
// The function getAllElementsWithParentsIn picks out elements from the ontology that
// have a "has_role" relationship with any member of metabolite_categories.
// E.g., chemA has_role eukaryotic_metabolite; chemB has_role metabolite
// then { chemA -> eukaryotic_metabolite, chemB -> metabolite } will be in the
// returned map.
// The fn also gathers data for each of the elems
// e.g., { Sy->Synonyms, X->xref, S->SMILES, I -> InChI, Ik -> InChIKey }
HashMap<OWLClass, P<OWLClass, HashMap<String, EntryTypes>>> elems;
elems = chebi.getAllElementsWithParentsIn(metabolite_categories);
for (OWLClass e : elems.keySet())
output(e, elems.get(e), inchis, ontology);
}
private static HashMap<String, String> readInchis(IRI documentIRI) throws IOException {
String loc = documentIRI.getNamespace() + documentIRI.getFragment();
if (loc.startsWith("file:")) {
loc = loc.substring("file:".length());
} else {
System.err.println("Expecting inchi file as file:///absolute/path");
System.exit(-1);
}
BufferedReader in = new BufferedReader(new InputStreamReader(new FileInputStream(loc)));
String line;
HashMap<String, String> inchis = new HashMap<String, String>();
while ((line = in.readLine()) != null) {
String[] id_inc = line.split("\t");
String id = "CHEBI:" + id_inc[0];
String inchi = id_inc[1];
inchis.put(id, inchi);
}
return inchis;
}
private static void output(OWLClass elem, P<OWLClass, HashMap<String, EntryTypes>> meta, HashMap<String, String> inchis, OWLOntology ontology) {
OWLClass parent = meta.fst();
HashMap<String, EntryTypes> data = meta.snd();
String id = getID(elem);
String inchi = inchis.get(id);
if (inchi == null) {
// If there is no inchi we cannot merge this chemical into Act
// so ignore...
System.err.println("no inchi for " + id);
return;
}
JSONObject metadata = new JSONObject();
metadata.put("of_type", getID(parent));
// Synonym, Definition, SMILES, xref, InChI, InChIKey,
// has_role, has_part, has_functional_parent, has_parent_hydride,
// is_conjugate_acid_of, is_conjugate_base_of, is_enantiomer_of,
// is_substituent_group_from, is_tautomer_of
JSONArray d;
if ((d = getMeta(data, EntryTypes.Definition)) != null && d.length() != 0)
metadata.put(EntryTypes.Definition.toString(), d);
if ((d = getMeta(data, EntryTypes.Synonym)) != null && d.length() != 0)
metadata.put(EntryTypes.Synonym.toString(), d);
if ((d = getMeta(data, EntryTypes.xref)) != null && d.length() != 0)
metadata.put(EntryTypes.xref.toString(), d);
// Need to output in format
// CHEBI<tab><ChebiID><tab><inchi><tab><json metadata>
String row = "CHEBI\t";
row += id + "\t";
row += inchi + "\t";
row += metadata.toString();
System.out.println(row);
// System.out.println(labelFor(elem, ontology) + "\t" + labelFor(type, ontology));
}
private static String getID(OWLClass c) {
String frag = c.getIRI().getFragment(); // return CHEBI_25212 etc.
return frag.replace('_', ':');
}
private static JSONArray getMeta(HashMap<String, EntryTypes> map, EntryTypes typToAdd) {
JSONArray vals = new JSONArray();
for (String val : map.keySet()) {
if (typToAdd.equals(map.get(val)))
vals.put(val);
}
return vals;
}
public static void main(String[] args) throws OWLException,
InstantiationException, IllegalAccessException,
ClassNotFoundException, IOException {
// We load an ontology from the URI specified
IRI documentIRI = IRI.create(args[0]);
IRI inchiIRI = IRI.create(args[1]);
String subtree = "CHEBI_25212(metabolite)";
if (args.length > 2)
subtree = args[2]; // override the default
find(subtree, documentIRI, inchiIRI);
}
}