/*************************************************************************
* *
* This file is part of the 20n/act project. *
* 20n/act enables DNA prediction for synthetic biology/bioengineering. *
* Copyright (C) 2017 20n Labs, Inc. *
* *
* Please direct all queries to act@20n.com. *
* *
* This program is free software: you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation, either version 3 of the License, or *
* (at your option) any later version. *
* *
* This program is distributed in the hope that it will be useful, *
* but WITHOUT ANY WARRANTY; without even the implied warranty of *
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
* GNU General Public License for more details. *
* *
* You should have received a copy of the GNU General Public License *
* along with this program. If not, see <http://www.gnu.org/licenses/>. *
* *
*************************************************************************/
package act.installer.brenda;
import act.server.DBIterator;
import act.server.MongoDB;
import act.shared.Chemical;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.mongodb.BasicDBList;
import com.mongodb.BasicDBObject;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
public class BrendaChebiOntology {
private static final Logger LOGGER = LogManager.getFormatterLogger(BrendaChebiOntology.class);
private static final int IS_SUBTYPE_OF_RELATIONSHIP_TYPE = 1;
private static final int HAS_ROLE_RELATIONSHIP_TYPE = 12;
private static ObjectMapper mapper = new ObjectMapper();
// This ChEBI ID corresponds to the ontology 'Application' which is a top-level role.
// The method getApplications then traverses the ontologies down from this ontology.
// The effect is to consider only roles that are applications, defined in the user manual as 'classifying [entities]
// on the basis of their intended use by humans'.
private static final String APPLICATION_CHEBI_ID = "CHEBI:33232";
/**
* The ChebiOntology class holds an ontology, defined as an ID (the ChEBI ID, for example 'CHEBI:16708' for adenine),
* a term holding a one-word definition and a longer definition.
* These are queried from 2 different tables in the Brenda database: ontology_chebi_{Definitions,Terms}
* We use a workaround (see http://stackoverflow.com/questions/4796872/full-outer-join-in-mysql) to mimic the
* full outer join in MySQL. That allows us to merge information in both table irrespective of the presence of
* an ontology in one or the other.
*/
public static class ChebiOntology {
// The following query allows to retrieve the terms (basic string defining an ontology) and definitions
// (when it exists) corresponding to a ChEBI id (ex: "CHEBI:46195") to create ChebiOntology objects.
public static final String QUERY = StringUtils.join(new String[]{
"SELECT",
" terms.id_go,",
" terms.term,",
" definitions.definition",
"FROM ontology_chebi_Terms terms",
"LEFT OUTER JOIN ontology_chebi_Definitions definitions",
"ON terms.id_go = definitions.id_go",
"UNION",
"SELECT",
" definitions.id_go,",
" terms.term,",
" definitions.definition",
"FROM ontology_chebi_Terms terms",
"RIGHT OUTER JOIN ontology_chebi_Definitions definitions",
"ON terms.id_go = definitions.id_go"
}, " ");
@JsonProperty("chebi_id")
private String chebiId;
@JsonProperty("term")
private String term;
@JsonProperty("definition")
private String definition;
public ChebiOntology(String chebiId, String term, String definition) {
this.chebiId = chebiId;
this.term = term;
this.definition = definition;
}
public String getChebiId() {
return this.chebiId;
}
public String getTerm() {
return this.term;
}
public String getDefinition() {
return this.definition;
}
// We override the equals and hashCode methods to make a ChebiOntology object hashable and allow construction of
// HashSet and HashMap of ChebiOntology objects.
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ChebiOntology that = (ChebiOntology) o;
return (chebiId != null) ? chebiId.equals(that.chebiId) : (that.chebiId == null);
}
@Override
public int hashCode() {
int result = chebiId.hashCode();
return result;
}
/* This function creates a ChebiOntology object from a ResultSet resulting from a SQL query.
* It pulls the 3 first fields from the query, assuming the order:
* ChebiId,
* Term,
* Definition
*/
public static ChebiOntology fromResultSet(ResultSet resultSet) throws SQLException {
return new ChebiOntology(
resultSet.getString(1),
resultSet.getString(2),
resultSet.getString(3));
}
public BasicDBObject toBasicDBObject() {
BasicDBObject o = new BasicDBObject();
o.put("chebi_id", getChebiId());
o.put("term", getTerm());
o.put("definition", getDefinition());
return o;
}
}
public static class ChebiRelationship {
// The following query allows to retrieve the relations of a given type, passed as argument.
// It is restricted to ids starting with the string 'CHEBI:'
public static final String QUERY = StringUtils.join(new String[]{
"SELECT",
" id_go,",
" rel_id_go",
"FROM ontology_chebi_Relations",
"WHERE type = ?",
"AND id_go like 'CHEBI:%'",
"AND rel_id_go like 'CHEBI:%'"
}, " ");
public static void bindType(PreparedStatement stmt, Integer relationshipType) throws SQLException {
stmt.setInt(1, relationshipType);
}
private String chebiId;
private String parentChebiId;
public ChebiRelationship(String chebiId, String parentChebiId) {
this.chebiId = chebiId;
this.parentChebiId = parentChebiId;
}
public String getChebiId() {
return chebiId;
}
public String getParentChebiId() {
return parentChebiId;
}
/* This function creates a ChebiOntology object from a ResultSet resulting from a SQL query.
* It pulls the 3 first fields from the query, assuming the order:
* chebiId,
* parentChebiId
* if type = 1, chebiId refers to a subtype of the ontology parentChebiId
* if type = 12, parentChebiId refers to a role of the ontology chebiId
*/
public static ChebiRelationship fromResultSet(ResultSet resultSet) throws SQLException {
return new ChebiRelationship(
resultSet.getString(1),
resultSet.getString(2));
}
}
public static class ChebiApplicationSet {
@JsonProperty("direct_applications")
private Set<ChebiOntology> directApplications;
@JsonProperty("main_applications")
private Set<ChebiOntology> mainApplications;
public ChebiApplicationSet(Set<ChebiOntology> directApplications,
Set<ChebiOntology> mainApplications) {
this.directApplications = directApplications;
this.mainApplications = mainApplications;
}
public Set<ChebiOntology> getMainApplications() {
return mainApplications;
}
public Set<ChebiOntology> getDirectApplications() {
return directApplications;
}
public BasicDBObject toBasicDBObject() {
BasicDBList directApplications = new BasicDBList();
BasicDBList mainApplications = new BasicDBList();
getDirectApplications().forEach(directApplication -> directApplications.add(directApplication.toBasicDBObject()));
getMainApplications().forEach(mainApplication -> mainApplications.add(mainApplication.toBasicDBObject()));
return new BasicDBObject()
.append("direct_applications", directApplications)
.append("main_applications", mainApplications);
}
}
/**
* This function fetches an ontology map (ChebiId -> ChebiOntology) given a connexion to the BRENDA DB.
* @param brendaDB A SQLConnexion object to the BRENDA DB
* @return a map from ChebiId to ChebiOntology objects
* @throws SQLException
*/
public static Map<String, ChebiOntology> fetchOntologyMap(SQLConnection brendaDB) throws SQLException {
int ontologiesProcessed = 0;
Map<String, ChebiOntology> ontologyMap = new HashMap<>();
Iterator<ChebiOntology> ontologies = brendaDB.getChebiOntologies();
while (ontologies.hasNext()) {
ChebiOntology ontology = ontologies.next();
// We should not see collisions with the ChEBI ID as key.
// The number of distinct ChEBI ID in the DB is the same as the number of rows.
ontologyMap.put(ontology.getChebiId(), ontology);
ontologiesProcessed++;
}
LOGGER.debug("Done processing ontologies");
LOGGER.debug("Found %d ontologies", ontologiesProcessed);
return ontologyMap;
}
/**
* This function fetches relationships of type 'isSubTypeOf' between ChebiID given a connexion to the
* BRENDA DB.
* @param brendaDB a SQLConnexion object to the BRENDA DB
* @return a map from a ChEBI ID (String) to a set of its subtypes' ChEBI ID.
* @throws SQLException
*/
public static Map<String, Set<String>> fetchIsSubtypeOfRelationships(SQLConnection brendaDB) throws SQLException {
// Initializations
int relationshipsProcessed = 0;
Map<String, Set<String>> isSubtypeOfRelationships = new HashMap<>();
// Get an iterator over all Chebi relationships of type "is subtype of".
Iterator<ChebiRelationship> relationships = brendaDB.getChebiRelationships(IS_SUBTYPE_OF_RELATIONSHIP_TYPE);
while (relationships.hasNext()) {
ChebiRelationship relationship = relationships.next();
// Get child and parent chebi id
String parentChebiId = relationship.getParentChebiId();
String childChebiId = relationship.getChebiId();
// Add child to the set of existing child ontologies
Set<String> childchebiIds = isSubtypeOfRelationships.get(parentChebiId);
if (childchebiIds == null) {
childchebiIds = new HashSet<>();
isSubtypeOfRelationships.put(parentChebiId, childchebiIds);
}
childchebiIds.add(childChebiId);
relationshipsProcessed++;
}
LOGGER.debug("Done processing 'is subtype of' relationships");
LOGGER.debug("Found %d 'is subtype of' relationships", relationshipsProcessed);
return isSubtypeOfRelationships;
}
/**
* This function fetches relationships of type 'hasRole' between ChebiID objects given a connexion to the
* BRENDA DB.
* @param brendaDB a SQLConnexion object to the BRENDA DB
* @return a map from a ChEBI ID (String) to a set of its roles' ChEBI ID.
* @throws SQLException
*/
public static Map<String, Set<String>> fetchHasRoleRelationships(SQLConnection brendaDB) throws SQLException {
// Initializations
int relationshipsProcessed = 0;
Map<String, Set<String>> hasRoleRelationships = new HashMap<>();
// Get an iterator over all Chebi relationships of type "has role".
Iterator<ChebiRelationship> relationships = brendaDB.getChebiRelationships(HAS_ROLE_RELATIONSHIP_TYPE);
while (relationships.hasNext()) {
// For each relationship "has role", we have a child and a parent chebi ids.
// We call the child the "base chebi id" and the parent the "role chebi id"
ChebiRelationship relationship = relationships.next();
String roleChebiId = relationship.getParentChebiId();
String baseChebiId = relationship.getChebiId();
// Get the existing set of roles for the chebi id of interest
Set<String> roles = hasRoleRelationships.get(baseChebiId);
if (roles == null) {
roles = new HashSet<>();
hasRoleRelationships.put(baseChebiId, roles);
}
// Add the role the existing set
roles.add(roleChebiId);
relationshipsProcessed++;
}
LOGGER.debug("Done processing 'has role' relationships");
LOGGER.debug("Found %s 'has role' relationships", relationshipsProcessed);
return hasRoleRelationships;
}
/**
* This method processes relatioships "is subtype of" to produce a mapping between each application and its main
* application, used subsequently (outside of this) to compute each ontology's main application.
* @param isSubtypeOfRelationships map {chebi id -> subtype's chebi ids}
* @param applicationChebiId main application's chebi id
* @return a map {application's chebi id -> related main application's chebi ids}
*/
public static Map<String, Set<String>> getApplicationToMainApplicationsMap(
Map<String, Set<String>> isSubtypeOfRelationships, String applicationChebiId) {
// Compute the set of main applications. These are the ontologies that are subtypes of the ontology 'application'.
Set<String> mainApplicationsChebiId = isSubtypeOfRelationships.get(applicationChebiId);
// Compute the initial list of applications to visit from the set of main applications.
ArrayList<String> applicationsToVisit = new ArrayList<>(mainApplicationsChebiId);
// For each main application, map it to a set containing only itself.
Map<String, Set<String>> applicationToMainApplicationsMap = applicationsToVisit.stream().
collect(Collectors.toMap(e -> e, Collections::singleton));
// Then visit all applications in a BFS fashion, appending new applications to visit to the applicationsToVisit
// and propagating/merging the set of main applications as we progress down the relationship graph.
int currentIndex = 0;
while (currentIndex < applicationsToVisit.size()) {
String currentApplication = applicationsToVisit.get(currentIndex);
Set<String> subApplications = isSubtypeOfRelationships.get(currentApplication);
if (subApplications != null) {
// add all sub-applications to the set of applications to visit
applicationsToVisit.addAll(subApplications);
for (String subApplication : subApplications) {
Set<String> mainApplicationsSet = applicationToMainApplicationsMap.get(subApplication);
if (mainApplicationsSet == null) {
mainApplicationsSet = new HashSet<>();
applicationToMainApplicationsMap.put(subApplication, mainApplicationsSet);
}
mainApplicationsSet.addAll(applicationToMainApplicationsMap.get(currentApplication));
}
}
currentIndex++;
}
return applicationToMainApplicationsMap;
}
/**
* This function fetches and construct the set of main and direct applications for each ontology that has a role.
* @param ontologyMap map {chebi id -> ChebiOntology object}
* @param isSubtypeOfRelationships map {chebi id -> set of chebi id for its subtypes}
* @param hasRoleRelationships map {chebi id -> set of chebi id for its roles}
* @return a map from ChebiOntology objects to a ChebiApplicationSet object
*/
public static Map<ChebiOntology, ChebiApplicationSet> getApplications(
Map<String, ChebiOntology> ontologyMap,
Map<String, Set<String>> isSubtypeOfRelationships,
Map<String, Set<String>> hasRoleRelationships) {
Map<String, Set<String>> applicationToMainApplicationsMap = getApplicationToMainApplicationsMap(
isSubtypeOfRelationships, APPLICATION_CHEBI_ID);
// Filter out the roles that are not applications
Map<String, Set<String>> directApplicationMap = new HashMap<>();
hasRoleRelationships.forEach((key, value) -> directApplicationMap.put(key, value.stream()
.filter(ontology -> applicationToMainApplicationsMap.keySet().contains(ontology))
.collect(Collectors.toSet())));
// Compute the set of main applications for each ontology that has a role (aka is a chemical entity).
Map<ChebiOntology, Set<ChebiOntology>> chemicalEntityToMainApplicationMap = new HashMap<>();
for (String chemicalEntity : directApplicationMap.keySet()) {
Set<ChebiOntology> mainApplicationsSet = chemicalEntityToMainApplicationMap.get(ontologyMap.get(chemicalEntity));
if (mainApplicationsSet == null) {
mainApplicationsSet = new HashSet<>();
chemicalEntityToMainApplicationMap.put(ontologyMap.get(chemicalEntity), mainApplicationsSet);
}
for (String parentApplication : directApplicationMap.get(chemicalEntity)) {
Set<String> mainApplications = applicationToMainApplicationsMap.get(parentApplication);
if (mainApplications != null) {
mainApplicationsSet.addAll(mainApplications.stream().map(ontologyMap::get).filter(Objects::nonNull).
collect(Collectors.toSet()));
}
}
}
// Finally, construct a ChebiApplicationSet object containing direct and main applications for the molecules.
Map<ChebiOntology, ChebiApplicationSet> chemicalEntityToApplicationsMap = new HashMap<>();
for (String chemicalEntity : directApplicationMap.keySet()) {
Set<ChebiOntology> directApplications = directApplicationMap
.get(chemicalEntity).stream().map(ontologyMap::get)
.filter(Objects::nonNull).collect(Collectors.toSet());
Set<ChebiOntology> mainApplications = chemicalEntityToMainApplicationMap.get(ontologyMap.get(chemicalEntity));
if (directApplications.size() > 0 || mainApplications.size() > 0) {
ChebiApplicationSet applications = new ChebiApplicationSet(directApplications, mainApplications);
chemicalEntityToApplicationsMap.put(ontologyMap.get(chemicalEntity), applications);
}
}
return chemicalEntityToApplicationsMap;
}
/**
* This function contains the main logic for adding ChEBI applications to the Installer database.
* Provided with a connexion to both the Mongo instance on which the database "actv01" lives and a SQL connexion to
* Brenda to retrieve the application sets corresponding to each ChEBI chemical.
* @param db a MongoDB object representing the connexion to the main MongoDB instance
* @param brendaDB a SQLConnexion to the Brenda database, on which to find the ChEBI ontologies and relationships
* @throws SQLException
* @throws IOException
*/
public void addChebiApplications(MongoDB db, SQLConnection brendaDB) throws SQLException, IOException {
// Get the ontology map (ChebiId -> ChebiOntology object)
Map<String, ChebiOntology> ontologyMap = fetchOntologyMap(brendaDB);
LOGGER.info("Done fetching ontology map: ChEBI ID -> ontology object (id, term, definition)");
// Get relationships of type 'isSubtypeOf'
Map<String, Set<String>> isSubtypeOfRelationships = fetchIsSubtypeOfRelationships(brendaDB);
LOGGER.info("Done fetching 'is subtype of' relationships");
// Get relationships of type 'hasRole'
Map<String, Set<String>> hasRoleRelationships = fetchHasRoleRelationships(brendaDB);
LOGGER.info("Done fetching 'has role' relationships");
// Get the applications for all chemical entities
Map<ChebiOntology, ChebiApplicationSet> chemicalEntityToApplicationsMap = getApplications(
ontologyMap, isSubtypeOfRelationships, hasRoleRelationships);
LOGGER.info("Done computing applications");
DBIterator chemicalsIterator = db.getIteratorOverChemicals();
// Iterate over all chemicals
while (chemicalsIterator.hasNext()) {
Chemical chemical = db.getNextChemical(chemicalsIterator);
String inchi = chemical.getInChI();
String chebiId = db.getChebiIDFromInchi(inchi);
if (chebiId == null || chebiId.equals("")) {
continue;
}
LOGGER.info("Processing Chemical with InChI: %s and ChEBI ID: %s", inchi, chebiId);
ChebiOntology ontology = ontologyMap.get(chebiId);
ChebiApplicationSet applicationSet = chemicalEntityToApplicationsMap.get(ontology);
if (applicationSet == null) {
LOGGER.debug("Found no applications for %s. Skipping database update for this chemical.", chebiId);
continue;
}
db.updateChemicalWithChebiApplications(chebiId, applicationSet);
}
}
public static void main(String[] args) throws Exception {
// We provide a proof of concept in this main function. This should later be moved to either a test or removed.
// Connect to the BRENDA DB
SQLConnection brendaDB = new SQLConnection();
brendaDB.connect("127.0.0.1", 3306, "brenda_user", "");
// Get the ontology map (ChebiId -> ChebiOntology object)
Map<String, ChebiOntology> ontologyMap = fetchOntologyMap(brendaDB);
// Get "is subtype of" relationships
Map<String, Set<String>> isSubTypeOfRelationships = fetchIsSubtypeOfRelationships(brendaDB);
// Get "has role" relationships
Map<String, Set<String>> hasRoleRelationships = fetchHasRoleRelationships(brendaDB);
// Get the applications for all chemical entities
Map<ChebiOntology, ChebiApplicationSet> chemicalEntityToApplicationsMap = getApplications(
ontologyMap, isSubTypeOfRelationships, hasRoleRelationships);
ChebiOntology applicationOntology = ontologyMap.get("CHEBI:46195");
// Convert ChebiApplicationSet to JSON string and pretty print
String chebiApplicationSetString = mapper.writerWithDefaultPrettyPrinter().writeValueAsString(
chemicalEntityToApplicationsMap.get(applicationOntology));
System.out.println(chebiApplicationSetString);
// Disconnect from the BRENDA DB
brendaDB.disconnect();
}
}