/** * */ package conceptmapping; import java.io.File; import java.io.PrintWriter; import java.io.StringWriter; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.Statement; import java.util.ArrayList; import java.util.Hashtable; import java.util.Iterator; import java.util.List; import oboaccessor.OBO2DB; import org.apache.log4j.Logger; import org.semanticweb.owlapi.model.OWLClass; import owlaccessor.OWLAccessorImpl; // TODO: Auto-generated Javadoc /** * The Class TermOutputer. * * @author Hong Updates * This class output extracted terms and their mapping PATO concepts to a table, including source info. * Run this class after completing CharaParser(4Phenoscape).step2 (2.1, 2.2, 2.3, and 2.4) * 11/23: rewrite to accommodate OBO format * * * TAO: http://purl.obolibrary.org/obo/tao.owl * VAO(Vertebrate Anatomy Ontology): https://phenoscape.svn.sourceforge.net/svnroot/phenoscape/trunk/vocab/skeletal/obo/vertebrate_anatomy.obo * AAO(Amniote Anatomy Ontology): https://phenoscape.svn.sourceforge.net/svnroot/phenoscape/trunk/vocab/amniote_draft.obo */ public class TermOutputer { /** The conn. */ private Connection conn; /** The database. */ private String database; /** The username. */ private String username = "root"; /** The password. */ private String password = "root"; /** The entitytable. */ private String entitytable = "entity"; /** The qualitytable. */ private String qualitytable = "quality"; /** The entity ontos path. */ private ArrayList<String> entityOntosPath =null; /** The quality ontos path. */ private ArrayList<String> qualityOntosPath =null; /** The glosstable. */ private String glosstable = null; /** The sourceprefix. */ private String sourceprefix = null; private static final Logger LOGGER = Logger.getLogger(TermOutputer.class); /** * Instantiates a new term outputer. * * @param database the database * @param outputtableprefix the outputtableprefix * @param eOntoPaths the e onto paths * @param qOntoPaths the q onto paths * @param glosstable the glosstable * @param sourceprefix the sourceprefix */ public TermOutputer(String database, String outputtableprefix, ArrayList<String> eOntoPaths, ArrayList<String> qOntoPaths, String glosstable, String sourceprefix) { this.entitytable = outputtableprefix+"_"+entitytable; this.qualitytable = outputtableprefix+"_"+qualitytable; this.entityOntosPath = eOntoPaths; this.qualityOntosPath = qOntoPaths; this.glosstable = glosstable; this.sourceprefix = sourceprefix; this.database = database; //String PATOURL="C:/Documents and Settings/Hong Updates/Desktop/Australia/phenoscape-fish-source/pato.owl"; //String TAOURL="C:/Documents and Settings/Hong Updates/Desktop/Australia/phenoscape-fish-source/tao.owl"; //this.patoapi = new OWLAccessorImpl(new File(PATOURL)); //this.taoapi = new OWLAccessorImpl(new File(TAOURL)); try{ if(conn == null){ Class.forName("com.mysql.jdbc.Driver"); String URL = "jdbc:mysql://localhost/"+database+"?user="+username+"&password="+password; conn = DriverManager.getConnection(URL); Statement stmt = conn.createStatement(); //stmt.execute("drop table if exists "+ entitytable); TODO uncomment //stmt.execute("create table if not exists "+entitytable+" (id int(11) not null unique auto_increment primary key, term varchar(100), ontoid text, ontolabel text, characterr text, characterstate text, source text)"); TODO uncomment stmt.execute("drop table if exists "+qualitytable); stmt.execute("create table if not exists "+qualitytable+" (id int(11) not null unique auto_increment primary key, term varchar(100), ontoid text, ontolabel text, characterr text, characterstate text, source text)"); } }catch(Exception e){ LOGGER.error("", e); } } /** * Output. */ public void output(){ ArrayList<String> entities =getEterms(); ArrayList<String> qualities = getQterms(); //outputTerms(entities, entitytable); TODO, uncomment outputTerms(qualities, qualitytable); } /** * Process term before look it up in ontology and insert into database. * * @param term the original form of the term * @param type * @return the processed term */ private String processTerm(String term, String type){ String termProcessed = term; //Step 1: replace all underscores with hyphens. Universal for entities and qualities. termProcessed = termProcessed.replaceAll("_", "-"); //Step 2: transform plurals to singular. Only for entities. if(type.compareTo(this.entitytable)==0 && outputter.knowledge.TermOutputerUtilities.isPlural(termProcessed)){ termProcessed=outputter.knowledge.TermOutputerUtilities.toSingular(termProcessed); } return termProcessed; } /** * Output terms. * * @param entities the entities * @param type the type */ private void outputTerms(ArrayList<String> entities, String type) { Iterator<String> it = entities.iterator(); String outtableo = type; try{ Statement stmt = conn.createStatement(); ResultSet rs = null; while(it.hasNext()){ String term = it.next(); type = outtableo; //use the pre-processed term for ontology looking up String termProcessed = this.processTerm(term, type); String[] ontoidinfo = findID(termProcessed, type); String ontoid = ""; String ontolabel = ""; if(ontoidinfo !=null){ type = ontoidinfo[0]; ontoid = ontoidinfo[1]; ontolabel = ontoidinfo[2]; } //we are using the original form of the term to look up sources, don't make changes to the original form before this point. rs = stmt.executeQuery("select distinct source, sentence, type from "+this.sourceprefix+"_sentence where sentence rlike '(^|[^a-z])"+term+"([^a-z]|$)'" ); String sourcelist = "sourcelist|"; //this is so that the first source is not to match "()". String source = ""; String sentence = ""; String character = ""; while(rs.next()){ source=rs.getString("source"); if(!source.matches("("+sourcelist.replaceFirst("\\|$", "")+")")){ sourcelist +=rs.getString("source")+"|"; character = rs.getString("type"); sentence = rs.getString("sentence"); type = type.replaceFirst(";+$", ""); //insert the prcessed term addrecord(termProcessed, ontoid, ontolabel, source.toString(), character, sentence, type); } } /*rs = stmt.executeQuery("select distinct pdf, charnumber, characterr, sentence from "+this.sourceprefix+"_original where sentence rlike '(^|[^a-z])"+term+"([^a-z]|$)' or characterr rlike '(^|[^a-z])"+term+"([^a-z]|$)'" ); String sourcelist = "sourcelist|"; //this is so that the first source is not to match "()". String source = ""; String sentence = ""; String character = ""; while(rs.next()){ source=rs.getString("pdf")+":"+rs.getString("charnumber"); if(!source.matches("("+sourcelist.replaceFirst("\\|$", "")+")")){ sourcelist +=rs.getString("pdf")+":"+rs.getString("charnumber")+"|"; character = rs.getString("characterr"); sentence = rs.getString("sentence"); type = type.replaceFirst(";+$", ""); addrecord(term, ontoid, ontolabel, source.toString(), character, sentence, type); } }*/ } rs.close(); stmt.close(); }catch(Exception e){ LOGGER.error("", e); } } /** * Addrecord. * * @param term the term * @param ontoid the ontoid * @param ontolabel the ontolabel * @param source the source * @param character the character * @param sentence the sentence * @param outtable the outtable */ private void addrecord(String term, String ontoid, String ontolabel, String source, String character, String sentence, String outtable) { try{ Statement stmt = conn.createStatement(); sentence = sentence.replaceAll("\"", "\\\\\""); character = character.replaceAll("\"", "\\\\\""); String q = "insert into "+outtable+"(term, ontoid, ontolabel, characterr, characterstate, source) values (\""+term+"\",\""+ontoid+"\",\""+ontolabel+"\",\""+character+"\",\""+sentence+"\",\""+source+"\")"; System.out.println(q); stmt.execute(q); }catch(Exception e){ LOGGER.error("", e); } } /** * Find id. * * @param term the term * @param type the type * @return the string[] */ private String[] findID(String term, String type) { String qualityid = ""; String entityid = ""; String qualitylabel = ""; String entitylabel=""; try{ //check annotated record Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery("select qualityontoid from phenoscape.fish_original_1st_all where qualitylabel='"+term+"'"); if(rs.next()){ qualityid = rs.getString("qualityontoid"); qualitylabel = term; } rs = stmt.executeQuery("select entityontoid from phenoscape.fish_original_1st_all where entitylabel='"+term+"'"); if(rs.next()){ entityid = rs.getString("entityontoid"); entitylabel = term; } if((entityid+qualityid).trim().length()==0){ return searchOntologies(term, type); }else if(entityid.length()>0){ return new String[]{this.entitytable, entityid, entitylabel}; }else if(qualityid.length()>0){ return new String[]{this.qualitytable, qualityid, qualitylabel}; } }catch(Exception e){ LOGGER.error("", e); } return null; } /** * use OWL API. * * @param term the term * @param type the type * @return the string[] * @throws Exception the exception */ private String[] searchOntologies(String term, String type) throws Exception { //search quality ontologies String[] results = new String[]{"", "", ""}; boolean added = false; if(type.compareTo(this.qualitytable)==0){ for(String qonto: this.qualityOntosPath){ if(qonto.endsWith(".owl")){ OWLAccessorImpl owlapi = new OWLAccessorImpl(new File(qonto), new ArrayList<String>()); String[] result = searchOWLOntology(term, owlapi, type); if(result!=null){ added = true; results = add(results, result); } }else if(qonto.endsWith(".obo")){ String[] result = searchOBOOntology(term, qonto, type); if(result!=null){ added = true; results = add(results, result); } } } }else if(type.compareTo(this.entitytable)==0){ for(String eonto: this.entityOntosPath){ if(eonto.endsWith(".owl")){ OWLAccessorImpl owlapi = new OWLAccessorImpl(new File(eonto), new ArrayList<String>()); String[] result = searchOWLOntology(term, owlapi, type); if(result!=null){ added = true; results = add(results, result); } }else if(eonto.endsWith(".obo")){ String[] result = searchOBOOntology(term, eonto, type); if(result!=null){ added = true; results = add(results, result); } } } } if(added){ return results; }else{ return null; } /*String[] patoresult = searchOntology(term, this.qualitytable); String[] taoresult = searchOntology(term, this.entitytable); if(patoresult==null && taoresult!=null) return taoresult; if(patoresult!=null && taoresult==null) return patoresult; if(patoresult!=null && taoresult!=null) return taoresult;*/ /*{//merge String[] results = new String[patoresult.length + taoresult.length]; int i; int j; for(i=0, j=0; i<patoresult.length; i++, j++){ results[i] = patoresult[j]; } for(i=patoresult.length, j=0; i<patoresult.length + taoresult.length; i++, j++){ results[i] = taoresult[j]; } return results; }*/ } /** * Search obo ontology. * * @param term the term * @param ontofile the ontofile * @param type the type * @return the string[] * @throws Exception the exception */ private String[] searchOBOOntology(String term, String ontofile, String type) throws Exception{ String [] result = new String[3]; int i = ontofile.lastIndexOf("/"); int j = ontofile.lastIndexOf("\\"); i = i>j? i:j; String ontoname = ontofile.substring(i+1).replaceFirst("\\.obo", ""); OBO2DB o2d = new OBO2DB("obo", ontofile ,ontoname); String[] match = o2d.getID(term); if(match !=null){ result[0] = type; result[1] = match[0]; //id result[2] = match[1]; //label }else{ result = null; } return result; } /** * Adds the. * * @param results the results * @param result the result * @return the string[] */ private String[] add(String[] results, String[] result) { if(result == null) return results; int start = 1; if(results[0].length()==0 && results[1].length()==0 && results[2].length()==0 ){//initialization start =0; } for(int i = start; i < 3; i++){ results[i] += result[i]+";"; } return results; } /** * Search owl ontology. * * @param term the term * @param owlapi the owlapi * @param type the type * @return the string[] * @throws Exception the exception */ private String[] searchOWLOntology(String term, OWLAccessorImpl owlapi, String type)throws Exception { String[] result = null; Hashtable<String, ArrayList<OWLClass>> typedmatches = owlapi.retrieveConcept(term); ArrayList<OWLClass> matches = typedmatches.get("original"); matches.addAll(typedmatches.get("exact")); //matches.addAll(typedmatches.get("narrow")); //matches.addAll(typedmatches.get("related")); Iterator<OWLClass> it = matches.iterator(); //exact match first while(it.hasNext()){ OWLClass c = it.next(); String label = owlapi.getLabel(c); if(label.compareToIgnoreCase(term)==0){ result= new String[3]; result[0] = type; result[1] = c.toString().replaceFirst("http.*?(?=(PATO|TAO|VAO|AMAO|AAO|UBERON)_)", "").replaceFirst("_", ":").replaceAll("[<>]", "");//id result[2] = label; return result; } } //otherwise, append all possible matches it = matches.iterator(); result = new String[]{"", "", ""}; while(it.hasNext()){ OWLClass c = it.next(); String label = owlapi.getLabel(c); result[0] = type; result[1] += c.toString().replaceFirst(".*http.*?(?=(PATO|TAO|VAO|AMAO|AAO|UBERON)_)", "").replaceFirst("_", ":").replaceAll("[<>]", "")+";"; result[2] += label+";"; } if(result[1].length()>0){ result[1] = result[1].replaceFirst(";$", ""); result[2] = result[2].replaceFirst(";$", ""); return result; }else{ return null; } } /** * Gets the qterms. * * @return the qterms */ private ArrayList<String> getQterms() { ArrayList<String> qterms = new ArrayList<String>(); try{ String q = "SELECT distinct word FROM "+this.sourceprefix+"_unknownwords where "+ //<<<<<<< HEAD // "word in (select term from phenoscape."+this.glosstable+" where category !='structure') or "+ //======= "word in (select term from "+this.glosstable+" where category !='structure') or "+ "word in (select word from "+this.sourceprefix+"_wordroles p where semanticrole ='c') or "+ "word in (select term from "+this.sourceprefix+"_term_category where category !='structure')"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(q); while(rs.next()){ String word = rs.getString("word"); if(!qterms.contains(word)){ qterms.add(word); } } }catch(Exception e){ LOGGER.error("", e); } return qterms; } /** * Gets the eterms. * * @return the eterms */ private ArrayList<String> getEterms() { ArrayList<String> eterms = new ArrayList<String>(); try{ String q = "SELECT distinct word FROM "+this.sourceprefix+"_unknownwords where "+ //<<<<<<< HEAD // "word in (select term from phenoscape."+this.glosstable+" where category ='structure') or "+ //======= "word in (select term from "+this.glosstable+" where category ='structure') or "+ //>>>>>>> branch 'master' of ssh://git@github.com/zilongchang/phenoscape-nlp.git "word in (select word from "+this.sourceprefix+"_wordroles p where semanticrole in ('os', 'op')) or "+ "word in (select term from "+this.sourceprefix+"_term_category where category ='structure')"; Statement stmt = conn.createStatement(); ResultSet rs = stmt.executeQuery(q); while(rs.next()){ String word = rs.getString("word"); if(!eterms.contains(word)){ eterms.add(word); } } }catch(Exception e){ LOGGER.error("", e); } return eterms; } /** * The main method. * * @param args the arguments */ public static void main(String[] args) { //need an database "obo" (may be empty) if search obo ontologies /* String database = "phenoscape"; String outputtableprefix = "pheno_amphibia"; String glosstable = "fishglossaryfixed"; //changed to amphibia (was archosaur) String sourceprefix = "pheno_amphibia"; ArrayList<String> eOntoPaths = new ArrayList<String>(); //changed eOntoPaths.add("C:\\Users\\Zilong Chang\\Documents\\WORK\\Ontology\\vertebrate_anatomy.obo"); //eOntoPaths.add("C:\\Users\\Zilong Chang\\Documents\\WORK\\Ontology\\AAO.obo"); //eOntoPaths.add("C:\\Users\\Zilong Chang\\Documents\\WORK\\Ontology\\AA.obo"); ArrayList<String> qOntoPaths = new ArrayList<String>(); qOntoPaths.add("C:\\Users\\Zilong Chang\\Documents\\WORK\\Ontology\\pato.owl"); */ //The three xml files sent by Alex on July 23, 2012 String database = "biocreative2012"; String outputtableprefix = "pheno_alex"; String glosstable = "fishglossaryfixed"; String sourceprefix = "phenoalex"; ArrayList<String> eOntoPaths = new ArrayList<String>(); //changed eOntoPaths.add("C:\\Users\\Zilong Chang\\Desktop\\onto\\phenoscape-ext.owl"); ArrayList<String> qOntoPaths = new ArrayList<String>(); qOntoPaths.add("C:\\Users\\Zilong Chang\\Desktop\\onto\\pato.owl"); qOntoPaths.add("C:\\Users\\Zilong Chang\\Desktop\\onto\\bspo.owl"); TermOutputer to = new TermOutputer(database, outputtableprefix, eOntoPaths, qOntoPaths, glosstable, sourceprefix); to.output(); } }