/*
* Concept profile generation tool suite
* Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center,
* Rotterdam, The Netherlands
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published
* by the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>
*/
package Anni;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.erasmusmc.dataimport.GeneOntology;
import org.erasmusmc.ontology.Concept;
import org.erasmusmc.ontology.DefaultTypes;
import org.erasmusmc.ontology.Ontology;
import org.erasmusmc.ontology.OntologyManager;
import org.erasmusmc.ontology.Relation;
import org.erasmusmc.ontology.ontologyutilities.OntologyUtilities;
import org.erasmusmc.semanticnetwork.SemanticGroup;
import org.erasmusmc.semanticnetwork.SemanticNetwork;
import org.erasmusmc.semanticnetwork.SemanticType;
import org.erasmusmc.utilities.ReadTextFile;
import org.erasmusmc.utilities.StringUtilities;
public class BuildPredefinedConceptSets {
public static String server = "localhost";
public static String database = "ConceptSets_july2012perOrg";
public static String user = "root";
public static String password = "";
public static String ontologyName = "UMLS2010ABHomologeneJochemToxV1_6";
public static String geneOntologyFilename = "/home/biosemantics/GO/go_20120714-assocdb.rdf-xml";
public static String toxClassificationTreeFilename = "/home/biosemantics/AnniUpdate/tox_classification_tree.txt";
public static String toxConcept2SetFilename = "/home/biosemantics/AnniUpdate/chemConceptIDToToxClass_v3.txt";
public static int molfuncConceptSet = 3;
public static int celcompConceptSet = 4;
public static int bioprocConceptSet = 5;
public static int semanticGroupdConceptSet = 6;
public static int genesConceptSet = 7;
public static int chemicalsConceptSet = 8;
public static int toxConceptSet = 9;
public static int filtersConceptSet = 10;
public static void main(String[] args) {
OntologyManager manager = new OntologyManager(server, user, password);
Ontology ontology = manager.fetchClient(ontologyName);
insertTopSets();
doSemanticTypesandGroups(ontology);
doGenes(ontology);
doChemicals(ontology);
doExcFilter(ontology);
GeneOntology geneOntology = new GeneOntology(geneOntologyFilename);
geneOntology.dumpDatasetsInMySQL(geneOntology.buildOntology(), ontology, server, database, user, password, molfuncConceptSet, celcompConceptSet, bioprocConceptSet);
doToxTree(ontology);
System.out.println(StringUtilities.now() + "\tDone");
}
private static void doToxTree(Ontology ontology) {
System.out.println(StringUtilities.now() + "\tInserting tox tree");
Map<String, Integer> internalID2ConceptSet = new HashMap<String, Integer>();
try {
Statement stmt = initStatement();
//Add tree:
for (String line : new ReadTextFile(toxClassificationTreeFilename)){
String[] cols = line.split("\t");
String internalID = cols[0];
String name = cols[1];
stmt.execute("INSERT INTO conceptsets (name,parent) VALUES (\""+name+"\","+toxConceptSet+");");
ResultSet result = stmt.executeQuery("SELECT LAST_INSERT_ID()");
if (result.next()){
Integer conceptSet = result.getInt(1);
internalID2ConceptSet.put(internalID, conceptSet);
}
}
//Add concepts:
for (String line : new ReadTextFile(toxConcept2SetFilename)){
String[] cols = line.split("\t");
String conceptID = cols[0];
String internalID = cols[1];
Integer conceptSet = internalID2ConceptSet.get(internalID);
if (conceptSet == null)
System.err.println("Illegal concept set in tox file: " + internalID);
else {
stmt.execute("REPLACE INTO set_2_concept VALUES (" + conceptID + "," + conceptSet + ");");
}
}
} catch (SQLException e) {
e.printStackTrace();
}
}
private static void insertTopSets() {
System.out.println(StringUtilities.now() + "\tInserting top sets");
try {
Statement stmt = initStatement();
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES (1, \"Predefined Concept Sets\", 0);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES (2, \"GO\", 1);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES (3, \"Molecular Function\", 2);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES (4, \"Cellular Component\", 2);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES (5, \"Biological Process\", 2);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES ("+semanticGroupdConceptSet+", \"Semantic Groups\", 1);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES ("+genesConceptSet+", \"Genes\", 1);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES ("+chemicalsConceptSet+", \"Chemicals\", 1);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES ("+toxConceptSet+", \"Toxic Effect (RTECS & IARC) \", 1);");
stmt.execute("REPLACE INTO conceptsets (id,name,parent) VALUES ("+filtersConceptSet+", \"Filters\", 1);");
} catch (SQLException e) {
e.printStackTrace();
}
}
private static void doSemanticTypesandGroups(Ontology ontology) {
System.out.println(StringUtilities.now() + "\tInserting semantic types and groups");
int parent = 1;
SemanticNetwork semanticNetwork = new SemanticNetwork();
semanticNetwork.loadDefaultsFromFile();
for(SemanticGroup semanticGroup: semanticNetwork.groups.values()) {
int autoIncKeyFromFuncGroup = -1;
Statement stmt = initStatement();
ResultSet result;
try {
result = stmt.executeQuery("SELECT id FROM conceptsets WHERE name='" + semanticGroup.name + "' AND parent='" + semanticGroupdConceptSet + "'");
if(result.next())
autoIncKeyFromFuncGroup = result.getInt(1);
else {
stmt.execute("INSERT INTO conceptsets VALUES (NULL, '" + semanticGroup.name + "','" + semanticGroupdConceptSet + "')");
result = stmt.executeQuery("SELECT LAST_INSERT_ID()");
if (result.next())
autoIncKeyFromFuncGroup = result.getInt(1);
for(SemanticType semanticType: semanticNetwork.types.values()) {
int autoIncKeyFromFuncGroupType = -1;
if(semanticType.group == semanticGroup) {
result = stmt.executeQuery("SELECT id FROM conceptsets WHERE name='" + semanticType.name + "' AND parent='" + autoIncKeyFromFuncGroup +"'");
if(result.next())
autoIncKeyFromFuncGroupType = result.getInt(1);
else {
stmt.execute("INSERT INTO conceptsets VALUES (NULL, '" + semanticType.name + "','" + autoIncKeyFromFuncGroup + "')");
result = stmt.executeQuery("SELECT LAST_INSERT_ID()");
if (result.next())
autoIncKeyFromFuncGroupType = result.getInt(1);
}
try {
List<Relation> relations = ontology.getRelationsForConceptAsObject(-semanticType.ID);
ArrayList<String> sqlTypes = new ArrayList<String>();
for(Relation rel: relations) {
sqlTypes.add("('" + rel.subject +"','" + autoIncKeyFromFuncGroupType + "')");
}
if(sqlTypes.size() > 10000) {
for(int i=0;i<sqlTypes.size();i+=10000) {
int end = i + 10000;
if(end > sqlTypes.size()) end = sqlTypes.size();
stmt.execute("REPLACE INTO set_2_concept VALUES " + StringUtilities.join(sqlTypes.subList(i, end),","));
}
}
else
if(sqlTypes.size() != 0)
stmt.execute("REPLACE INTO set_2_concept VALUES " + StringUtilities.join(sqlTypes,","));
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
}
} catch (SQLException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
//getChildren(ontology, null, semanticNetwork, parent);
}
private static void getChildren(Ontology ontology, SemanticType parentType, SemanticNetwork semanticNetwork, int parentid) {
Statement stmt = initStatement();
for (SemanticType semanticType: semanticNetwork.types.values()) {
if (semanticType.parent == parentType) {
ResultSet result;
int autoIncKeyFromFunc = -1;
String parentName = "Semantic Types";
if(parentType != null)
parentName = parentType.name;
try {
result = stmt.executeQuery("SELECT id FROM conceptsets WHERE name='" + parentName + "' AND parent='" + parentid + "'");
if (result.next())
autoIncKeyFromFunc = result.getInt(1);
else {
stmt.execute("INSERT INTO conceptsets VALUES (NULL, '" + parentName + "','" + parentid + "')");
result = stmt.executeQuery("SELECT LAST_INSERT_ID()");
if (result.next())
autoIncKeyFromFunc = result.getInt(1);
}
} catch (SQLException e) {
e.printStackTrace();
System.out.println("INSERT INTO conceptsets VALUES (NULL, '" + parentName + "','" + parentid + "')");
}
try {
List<Relation> relations = ontology.getRelationsForConceptAsObject(-semanticType.ID);
ArrayList<String> sqlTypes = new ArrayList<String>();
for(Relation rel: relations) {
sqlTypes.add("('" + rel.subject +"','" + autoIncKeyFromFunc + "')");
}
if(sqlTypes.size() > 10000) {
for(int i=0;i<sqlTypes.size();i+=10000) {
int end = i + 10000;
if(end > sqlTypes.size()) end = sqlTypes.size();
stmt.execute("REPLACE INTO set_2_concept VALUES " + StringUtilities.join(sqlTypes.subList(i, end),","));
}
}
else
if(sqlTypes.size() != 0)
stmt.execute("REPLACE INTO set_2_concept VALUES " + StringUtilities.join(sqlTypes,","));
} catch (SQLException e) {
e.printStackTrace();
}
getChildren(ontology, semanticType, semanticNetwork, autoIncKeyFromFunc);
}
}
}
private static Statement initStatement() {
Statement stmt = null;
try {
Class.forName("com.mysql.jdbc.Driver");
try {
Connection con = DriverManager.getConnection("jdbc:mysql://"+server+":3306/",user, password);
stmt = con.createStatement(ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_UPDATABLE);
stmt.execute("USE " + database);
} catch (SQLException e) {
e.printStackTrace();
}
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
return stmt;
}
private static void doExcFilter(Ontology ontology) {
System.out.println(StringUtilities.now() + "\tInserting exc. filter for genes");
Statement stmt = initStatement();
//Insert exclusion filter concept set:
int excFilterConceptSet = -1;
try {
stmt.execute("REPLACE INTO conceptsets (name,parent) VALUES (\"Excl. filter for genes\", "+filtersConceptSet+");");
ResultSet result = stmt.executeQuery("SELECT LAST_INSERT_ID()");
if (result.next())
excFilterConceptSet = result.getInt(1);
} catch (SQLException e) {
e.printStackTrace();
}
//Generate filter:
List<Integer> semanticTypes = new ArrayList<Integer>();
semanticTypes.add(-1);
semanticTypes.add(-2);
semanticTypes.add(-3);
semanticTypes.add(-4);
semanticTypes.add(-5);
semanticTypes.add(-6);
semanticTypes.add(-7);
semanticTypes.add(-8);
semanticTypes.add(-9);
semanticTypes.add(-10);
semanticTypes.add(-11);
semanticTypes.add(-12);
semanticTypes.add(-13);
semanticTypes.add(-14);
semanticTypes.add(-15);
semanticTypes.add(-16);
semanticTypes.add(-51);
semanticTypes.add(-52);
semanticTypes.add(-53);
semanticTypes.add(-54);
semanticTypes.add(-55);
semanticTypes.add(-56);
semanticTypes.add(-57);
semanticTypes.add(-58);
semanticTypes.add(-59);
semanticTypes.add(-60);
semanticTypes.add(-61);
semanticTypes.add(-62);
semanticTypes.add(-63);
semanticTypes.add(-64);
semanticTypes.add(-65);
semanticTypes.add(-66);
semanticTypes.add(-67);
semanticTypes.add(-68);
semanticTypes.add(-69);
semanticTypes.add(-70);
semanticTypes.add(-71);
semanticTypes.add(-72);
semanticTypes.add(-73);
semanticTypes.add(-74);
semanticTypes.add(-75);
semanticTypes.add(-77);
semanticTypes.add(-78);
semanticTypes.add(-79);
semanticTypes.add(-80);
semanticTypes.add(-81);
semanticTypes.add(-82);
semanticTypes.add(-83);
semanticTypes.add(-85);
semanticTypes.add(-86);
semanticTypes.add(-87);
semanticTypes.add(-88);
semanticTypes.add(-89);
semanticTypes.add(-90);
semanticTypes.add(-91);
semanticTypes.add(-92);
semanticTypes.add(-93);
semanticTypes.add(-94);
semanticTypes.add(-95);
semanticTypes.add(-96);
semanticTypes.add(-97);
semanticTypes.add(-98);
semanticTypes.add(-99);
semanticTypes.add(-100);
semanticTypes.add(-101);
semanticTypes.add(-102);
semanticTypes.add(-169);
semanticTypes.add(-170);
semanticTypes.add(-171);
semanticTypes.add(-185);
semanticTypes.add(-203);
List<String> cuiList = new ArrayList<String>();
for(Integer cui: semanticTypes) {
List<Relation> relationList = ontology.getRelationsForConceptAsObject(cui);
for(Relation relation: relationList) {
cuiList.add("('"+relation.subject+"', '"+excFilterConceptSet+"')");
}
}
try {
stmt.execute("REPLACE INTO set_2_concept (conceptid, conceptsetid) VALUES " + StringUtilities.join(cuiList, ","));
} catch (SQLException e) {
e.printStackTrace();
}
}
private static void doGenes(Ontology ontology) {
System.out.println(StringUtilities.now() + "\tInserting genes");
Map<String, String> voc2vocName = new HashMap<String, String>();
voc2vocName.put("HSAPIENS", "H.sapiens");
voc2vocName.put("MMUSCULUS", "M.musculus");
voc2vocName.put("RNORVEGICUS", "R.norvegicus");
voc2vocName.put("ECOLI", "E.coli");
voc2vocName.put("SCEREVISIAE", "S.cerevisiae");
voc2vocName.put("DRERIO", "D.rerio");
voc2vocName.put("CELEGANS", "C.elegans");
voc2vocName.put("GGALLUS", "G.gallus");
voc2vocName.put("DMELANOGASTER", "D.melanogaster");
Map<String,Integer> voc2conceptSetID = new HashMap<String, Integer>();
Statement stmt = initStatement();
for (Map.Entry<String, String> entry: voc2vocName.entrySet()){
try {
stmt.execute("INSERT INTO conceptsets VALUES (NULL, '" + entry.getValue() + "','" + genesConceptSet + "')");
ResultSet result = stmt.executeQuery("SELECT LAST_INSERT_ID()");
if (result.next())
voc2conceptSetID.put(entry.getKey(),result.getInt(1));
} catch (SQLException e) {
e.printStackTrace();
}
}
Map<Integer,Integer> vocID2conceptSetID = new HashMap<Integer, Integer>();
List<String> stringList = new ArrayList<String>();
//int geneVocID = 0;
for (Concept concept : ontology){
if (concept.getID() < 0){//It might be a vocabulary
Integer conceptSetID = voc2conceptSetID.get(concept.getName());
if (conceptSetID != null)
vocID2conceptSetID.put(concept.getID(), conceptSetID);
//else if (concept.getName().equals(OntologyUtilities.geneVocabulary))
// geneVocID = concept.getID();
} else {
for (Relation relation : ontology.getRelationsForConceptAsSubject(concept.getID(), DefaultTypes.fromVocabulary))
if (vocID2conceptSetID.containsKey(relation.object))
stringList.add("('" + concept.getID() + "', '" + vocID2conceptSetID.get(relation.object) +"')");
}
}
try {
stmt.execute("REPLACE INTO set_2_concept (conceptid, conceptsetid) VALUES " + StringUtilities.join(stringList, ","));
} catch (SQLException e) {
e.printStackTrace();
}
}
private static void doChemicals(Ontology ontology) {
System.out.println(StringUtilities.now() + "\tInserting chemicals");
List<String> stringList = new ArrayList<String>();
for (Concept concept : ontology)
if (OntologyUtilities.hasChemVoc(concept, ontology))
stringList.add("('" + concept.getID() + "', '" + chemicalsConceptSet +"')");
Statement stmt = initStatement();
try {
stmt.execute("REPLACE INTO set_2_concept (conceptid, conceptsetid) VALUES " + StringUtilities.join(stringList, ","));
} catch (SQLException e) {
e.printStackTrace();
}
}
}