/* Date: September 15, 2010 * Template: PluginScreenJavaTemplateGen.java.ftl * generator: org.molgenis.generators.ui.PluginScreenJavaTemplateGen 3.3.2-testing * * THIS FILE IS A TEMPLATE. PLEASE EDIT :-) */ package plugins.LuceneIndex; import java.io.File; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.molgenis.framework.db.Database; import org.molgenis.framework.ui.PluginModel; import org.molgenis.framework.ui.ScreenController; import org.molgenis.util.Tuple; import uk.ac.ebi.ontocat.Ontology; import uk.ac.ebi.ontocat.OntologyService; import uk.ac.ebi.ontocat.OntologyServiceException; import uk.ac.ebi.ontocat.OntologyTerm; import uk.ac.ebi.ontocat.file.FileOntologyService; /** * Indexes all ontologies specified in ontologyNamesMap. The ontologies should * be downloaded on the computer in ONTOLOGIES_DIRECTORY Searches through the * index * * @param LUCENE_ONTOINDEX_DIRECTORY * - an empty directory to store the index * @param ONTOLOGIES_DIRECTORY * - the directory, where the ontologies are stored */ public class OntoCatIndexPlugin extends PluginModel<org.molgenis.util.Entity> { private static final long serialVersionUID = 71L; private String Status = ""; private String InputToken = "lung disease"; String OntocatIndexDir = System.getProperty("java.io.tmpdir.ontocatIndexdir"); // // static final String LUCENE_ONTOINDEX_DIRECTORY = // "/Users/despoina/Documents/molgenis4phenotypeWorkspace/molgenis4phenotype/OntocatIndex"; // static final String ONTOLOGIES_DIRECTORY = // "/Users/despoina/Documents/workspace/biobank_search/ontologies/"; public static final Map<String, String> ontologyNamesMap = new HashMap<String, String>() { /** * */ private static final long serialVersionUID = 1L; LuceneConfiguration LC = new LuceneConfiguration(); { put(LC.GetLuceneConfiguration("ONTOLOGIES_DIRECTORY") + "human-phenotype-ontology_v1294.obo", "Human Phenotype Ontology"); put(LC.GetLuceneConfiguration("ONTOLOGIES_DIRECTORY") + "human_disease_v1.251.obo", "Human Disease"); put(LC.GetLuceneConfiguration("ONTOLOGIES_DIRECTORY") + "Thesaurus_10_03.owl", "NCI Thesaurus"); put(LC.GetLuceneConfiguration("ONTOLOGIES_DIRECTORY") + "mesh.obo", "MeSH"); } }; public static void main(String[] args) throws Exception { OntoCatIndexPlugin p = new OntoCatIndexPlugin("x", null); p.buildIndexOntocat(); // List<String> ontologies = new ArrayList<String>(); // ontologies.add("Human Phenotype Ontology"); // ontologies.add("Human Disease"); // ontologies.add("NCI Thesaurus"); // ontologies.add("MeSH"); // p.setInputToken("cystic lung disease"); // p.setStatus("x"); // p.SearchIndexOntocat("asthma", ontologies); } public String getCustomHtmlHeaders() { return "<script src=\"res/scripts/lib.js\" language=\"javascript\"></script>\n" + "<script src=\"Prototype/prototype.js\" language=\"javascript\"></script>\n"; } public OntoCatIndexPlugin(String name, ScreenController<?> parent) { super(name, parent); } @Override public String getViewName() { return "plugin_LuceneIndex_OntoCatIndexPlugin"; } @Override public String getViewTemplate() { return "plugins/LuceneIndex/OntoCatIndexPlugin.ftl"; } @Override public void handleRequest(Database db, Tuple request) { // LuceneConfiguration LC = new LuceneConfiguration(); String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.ontocatIndexdir", tmp + "ontocatIndexdir/"); OntocatIndexDir = System.getProperty("java.io.tmpdir.ontocatIndexdir"); System.out.println(">>>>>>>>>system>>>>>>>>>" + OntocatIndexDir); if ("SearchOntocatLuceneIndex".equals(request.getAction())) { // if // (!this.DirectoryhasContents(LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY"))) // { if (!this.DirectoryhasContents(OntocatIndexDir)) { try { this.setInputToken(request.getString("InputToken")); this.setStatus("<h4> Starting search for " + request.getString("InputToken") + " in Ontocat index.</h4> "); String userQuery = this.getInputToken(); List<String> ontologies = new ArrayList<String>(); for (String o : ontologyNamesMap.values()) ontologies.add(o); this.SearchIndexOntocat(userQuery, ontologies); this.setInputToken(""); } catch (Exception e) { e.printStackTrace(); } } else { this.setStatus("<h4> Cannot search for " + request.getString("InputToken") + " Please create index first.</h4> "); } } } /** * The function deletes the DB index . The path is retrieved through * LuceneConfiguration index . The variable in LuceneIndexConfiguration * files defines if the program runs at mac or pc, in order to use the * proper directory separators. */ public void DeleteOntocatIndex() { String msWin; String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.ontocatIndexdir", tmp + "ontocatIndexdir/"); OntocatIndexDir = System.getProperty("java.io.tmpdir.ontocatIndexdir"); System.out.println(">>>>>>>>>system>>>>>>>>>" + OntocatIndexDir); LuceneConfiguration LC = new LuceneConfiguration(); System.out.println("coming from deleteOntocatIndex" + OntocatIndexDir); msWin = LC.GetLuceneConfiguration("msWin"); // OntoIndexDir = // LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY"); this.setStatus("<h4>About to delete the contents of the Ontocat index " + OntocatIndexDir + "</h4>"); // browse to the index directory deleteDirContents(OntocatIndexDir, 0, msWin); // this.setStatus("<h4>Produces from DeleteLuceneIndex "+ // LC.getINDX()+"</h4>"); this.setStatus("<h4>Contents of index directory " + OntocatIndexDir + " deleted </h4>"); } private void deleteDirContents(String fname, int deep, String msWin) { String DirSeparator = null; String FileName = null; File dir = new File(fname); String[] chld = dir.list(); if (msWin.compareTo("\"false\"") == 0) { DirSeparator = "/"; System.out.println("Hi, I am a mac"); } else if (msWin.compareTo("\"true\"") == 0) { DirSeparator = "\\"; System.out.println("Hi, I am a pc"); } if (dir.isFile()) { System.out.println("dirlist" + dir.getName()); return; } else if (dir.isDirectory()) { System.out.println(fname.substring(fname.lastIndexOf(DirSeparator))); for (int i = 0; i < chld.length; i++) { FileName = fname + DirSeparator + chld[i]; File subFile = new File(FileName); deleteDirContents(FileName, 0, msWin); System.out.println("deleting " + fname + DirSeparator + chld[i]); // deleting every file if (!subFile.canWrite()) throw new IllegalArgumentException("Delete: write protected: " + FileName); else this.setStatus("<h4>I can delete " + FileName + "</h4>"); // If it is a directory, make sure it is empty - This shouldn't // be reached : index does not contains directories. if (subFile.isDirectory()) { String[] files = subFile.list(); if (files.length > 0) throw new IllegalArgumentException("Delete: directory not empty: " + FileName); } // Attempt to delete it boolean success = subFile.delete(); if (!success) throw new IllegalArgumentException("Delete: deletion failed"); } } } public void say(String whatTosay) { System.out.println(whatTosay); } /* * Analyzer is not used, because search should be exact */ public String SearchIndexOntocat(String query, List<String> ontologyLabels) { IndexReader reader = null; IndexSearcher searcher = null; TopScoreDocCollector collector = null; Query query2 = null; ScoreDoc[] hits = null; // LuceneConfiguration LC = new LuceneConfiguration(); String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.ontocatIndexdir", tmp + "ontocatIndexdir/"); OntocatIndexDir = System.getProperty("java.io.tmpdir.ontocatIndexdir"); System.out.println(">>>>>>>>>system>>>>>>>>>" + OntocatIndexDir); String resultsTable = ""; String res = ""; resultsTable = "<p><table width=\"70%\" border=\"2\" bordercolor=\"#BDCDDA\" cellspacing=\"3\" cellpadding=\"3\">" + "<tr><td><b>Terms retrieved</b></td><td><b>Score</b></td></tr> "; // "<td><b>Term frequency</b></td><td><b>IDF</b></td><td><b>Field weight</b></td></tr>"; this.setStatus("<h4>Search for " + query + "in OntoIndex started. </h4> "); System.out.println("Search for " + query + " in OntoIndex just started"); List<String> result = new ArrayList<String>(); try { query = query.toLowerCase(); // File file = new // File(LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY")); File file = new File(OntocatIndexDir); reader = IndexReader.open(FSDirectory.open(file), true); say("query=" + query); searcher = new IndexSearcher(reader); collector = TopScoreDocCollector.create(1000, true); /** * making a boolean query to specify in which ontologies to search */ BooleanQuery labelQuery = new BooleanQuery(); BooleanQuery finalQuery = new BooleanQuery(); for (String ontologyLabel : ontologyLabels) { Query q = new TermQuery(new Term("ontologyLabel", ontologyLabel)); labelQuery.add(q, BooleanClause.Occur.SHOULD); } /** * the query to search the term in the field "term" */ query2 = new TermQuery(new Term("term", query)); /** * merging 2 queries together */ finalQuery.add(query2, BooleanClause.Occur.MUST); finalQuery.add(labelQuery, BooleanClause.Occur.MUST); System.out.println("finalQuery = " + finalQuery.toString()); searcher.search(finalQuery, collector); hits = collector.topDocs().scoreDocs; if (hits.length > 0) { List<String> expansion = new ArrayList<String>(); for (int i = 0; i < hits.length; i++) { int scoreId = hits[i].doc; Document document = searcher.doc(scoreId); Explanation explanation = searcher.explain(finalQuery, scoreId); // resultsTable+= "<tr>"; if (res.isEmpty()) { res = (document.getField("term").stringValue()) + ":"; resultsTable += "<tr><td>" + res + "</td></tr>"; } String[] exp_spl = document.getField("expansion").stringValue().split(";"); for (String exp : exp_spl) { if (!expansion.contains(exp)) { expansion.add(exp); res += ";" + exp; resultsTable += "<tr><td>" + exp + "</td>"; resultsTable += "<td>" + explanation.toString().split("=")[0] + "</td></tr>"; // resultsTable += "</tr><tr>"; } } // say("label: " + // document.getField("ontologyLabel").stringValue()); } res = res.replace(":;", ":"); resultsTable += "</table>"; resultsTable += "<p>Number of hits : " + hits; this.setStatus(resultsTable); // this.setStatus(res); System.out.println("RES:" + res); System.out.println("RESULTS table:" + resultsTable); } else { setStatus("<P>No records found for " + query + "in OntoINDEX</P>"); say("<P>No records found for " + query + "in OntoINDEX</P>"); } reader.close(); } catch (Exception e) { e.printStackTrace(); } for (String s : result) say("s = " + s); return res; } /** * The function creates an index on ontocat returned data. The data is * ontology name, term, synonyms + children * * @throws OntologyServiceException * @throws IllegalAccessException * @throws InstantiationException */ public void buildIndexOntocat() throws Exception { try { /** * An IndexWriter creates and maintains an index. analyzer isn't * used */ // LuceneConfiguration LC = new LuceneConfiguration(); IndexWriter writer = null; StandardAnalyzer analyzer = null; File file = null; String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.ontocatIndexdir", tmp + "ontocatIndexdir/"); OntocatIndexDir = System.getProperty("java.io.tmpdir.ontocatIndexdir"); System.out.println(">>>>>>>>>system>>>>>>>>>" + OntocatIndexDir); try { System.out.println("Start Indexing Ontocat results"); // this.setStatus("Starting indexing Ontocat results in " + // LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY")); this.setStatus("Starting indexing Ontocat results in " + OntocatIndexDir); // file = new // File(LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY")); file = new File(OntocatIndexDir); analyzer = new StandardAnalyzer(Version.LUCENE_30); writer = new IndexWriter(FSDirectory.open(file), analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); for (String ontology_file : ontologyNamesMap.keySet()) { say("now " + writer.getReader().numDocs() + " terms indexed"); System.out.println(ontology_file); File file1 = new File(ontology_file); OntologyService os = new FileOntologyService(file1.toURI()); Ontology onto = os.getOntologies().get(0); String label = ontologyNamesMap.get(ontology_file); Set<OntologyTerm> all_terms = new HashSet<OntologyTerm>(); all_terms = os.getAllTerms(onto.getOntologyAccession()); for (OntologyTerm term : all_terms) { /** * getting the term with ontology label inside the index * for each term we use a separate Document */ Document document = new Document(); Field termField = new Field("term", term.getLabel().toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED); document.add(termField); // Field ontologyAccessionField = new // Field("ontologyAccession", accss, Field.Store.YES, // Field.Index.NOT_ANALYZED); // document.add(ontologyAccessionField); Field ontologyLabelField = new Field("ontologyLabel", label, Field.Store.YES, Field.Index.NOT_ANALYZED); document.add(ontologyLabelField); /** * searching for synonyms and children in ontology, * writing them to "expansion" with delimiters ";" */ List<OntologyTerm> children = new ArrayList<OntologyTerm>(); List<String> syns = new ArrayList<String>(); String expansion = ""; syns = os.getSynonyms(term); // System.out.println("syns:\n" + syns); for (String s : syns) { if (term.getLabel().toLowerCase() != s) { // if it doesn't already exists s = "\"" + s.toLowerCase() + "\""; // System.out.println("syns: " + s); if (!expansion.contains(s)) expansion += ";" + s; } } children = os.getChildren(term); for (OntologyTerm t : children) { // System.out.println("children: " + t); String t_str = "\"" + t.getLabel().toLowerCase() + "\""; if (!expansion.contains(t_str)) expansion += ";" + t_str; } // System.out.println("expansion: " + expansion.trim()); Field expansionField = new Field("expansion", expansion.trim(), Field.Store.YES, Field.Index.NO); document.add(expansionField); // adding a Document to a IndexWriter writer.addDocument(document); } } /** * optimize the index */ System.out.println(": Optimizing Index :"); this.setStatus("Optimizing Ontocat Index"); writer.optimize(); } catch (Exception e) { e.printStackTrace(); } finally { try { if (writer != null) System.out.println(writer.getReader().numDocs()); writer.close(); System.out.println("Finished indexing Ontocat"); this.setStatus("Ontocat Indexing finished"); } catch (Exception ex) { ex.printStackTrace(); } } } catch (Exception e) { e.printStackTrace(); } } public boolean DirectoryhasContents(String directory) { File dir = new File(directory); boolean exists = dir.exists(); if (exists == false) { System.out.println("The directory " + directory + "does not exists. Creating directory. "); boolean success = (new File(directory)).mkdir(); if (success) { System.out.println("Directory: " + directory + " created"); } } boolean isEmpty = false; System.out.println("checking " + dir.getAbsolutePath()); System.out.println("isEmpty: " + isEmpty); if (dir.exists() && dir.isDirectory()) { if (dir.list().length == 0) { this.setStatus("<h4> The directory is empty</h4> "); System.out.println("The directory is empty."); return true; } else { // File[] files = dir.listFiles(); this.setStatus("<h4> The directory is NOT empty or does not exists .</h4> "); System.out.println("The directory is NOT empty or does not exists ."); } } else { return false; } return false; } @Override public void reload(Database db) { } public void setInputToken(String inputToken) { InputToken = inputToken; } public String getInputToken() { return InputToken; } public void setStatus(String status) { Status = status; } public String getStatus() { return Status; } }