/* Date: September 15, 2010 * Template: PluginScreenJavaTemplateGen.java.ftl * generator: org.molgenis.generators.ui.PluginScreenJavaTemplateGen 3.3.2-testing * * Despoina Antonakaki <D.Antonakaki@rug.nl> * Dasha Zhernakova <rokko_@mail.ru> * */ package plugins.LuceneIndex; import java.io.File; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Searchable; import org.apache.lucene.search.TopScoreDocCollector; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; import org.molgenis.core.UseCase; import org.molgenis.framework.db.Database; import org.molgenis.framework.db.DatabaseException; import org.molgenis.framework.ui.PluginModel; import org.molgenis.framework.ui.ScreenController; import org.molgenis.model.MolgenisModelException; import org.molgenis.model.elements.Entity; import org.molgenis.model.elements.Field; import org.molgenis.util.Tuple; /* * @param LUCENE_INDEX_DIRECTORY - an empty directory to store index files */ public class DBIndexPlugin extends PluginModel<org.molgenis.util.Entity> { private static final long serialVersionUID = 71L; private String Status = ""; private String InputToken = "Enter the query"; String result = ""; String useOntologies = "true"; /** * The NUM_OF_FIELDS is used in order to retrieve the number of existing * fields in the configuration file , defined by the user . This are all the * database fields that are going to be use in the creation of the index, * and later in the search. The DB_FIELD as it is retrieved from the * configuration file is build (dbfield1,dbfield2..) by the function * SearchAllDBTablesIndex() and the real database fields are retrieved from * the configuration file . */ static final String NUM_OF_FIELDS = "numberOfFields"; static final String DB_FIELD = "dbfield"; List<String> OntologiesForExpansion = null; public void setOntologiesForExpansion(List<String> ontologies) { OntologiesForExpansion = new ArrayList<String>(); OntologiesForExpansion = ontologies; } public DBIndexPlugin(String name, ScreenController<?> parent) { super(name, parent); } @Override public String getViewName() { return "plugin_LuceneIndex_DBIndexPlugin"; } @Override public String getViewTemplate() { return "plugins/LuceneIndex/DBIndexPlugin.ftl"; } @Override public void handleRequest(Database db, Tuple request) { LuceneConfiguration LC = new LuceneConfiguration(); /** * Retrieve the option of including ontologies from configuration */ String useOntologies = LC.GetLuceneConfiguration("USE_ONTOLOGIES"); if (useOntologies.compareTo("\"false\"") == 0) { System.out.println("---Not using ontologies---"); } else { /** * set the ontologies to use in query expansion */ List<String> ontologies = new ArrayList<String>(); if (request.getString("HPO") != null) ontologies.add("Human Phenotype Ontology"); if (request.getString("HD") != null) ontologies.add("Human Disease"); if (request.getString("NCI") != null) ontologies.add("NCI Thesaurus"); if (request.getString("MeSH") != null) ontologies.add("MeSH"); /* * if (request.getString("SelectAll") != null){ * System.out.println("All ontologies elected code reached"); * * ontologies.add("Human Phenotype Ontology"); * ontologies.add("Human Disease"); ontologies.add("NCI Thesaurus"); * ontologies.add("MeSH"); } */ if (ontologies.isEmpty()) { System.out.println("[Ontologies] is empty"); this.setStatus("<h4>Choose the ontologies to use for query expansion</h4>"); } setOntologiesForExpansion(ontologies); System.out.println("Ontologies : " + ontologies); } if ("SearchLuceneIndex".equals(request.getAction())) { // check if the index has been created, one way is to create a // boolean value , or check if the index directory contains an index // . String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.indexdir", tmp + "indexdir/"); String IndexDir = System.getProperty("java.io.tmpdir.indexdir"); System.out.println(">>>>>>>>>system>>>>>>>>>" + IndexDir); // if // (!this.DirectoryhasContents(LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY"))) // { if (!this.DirectoryhasContents(IndexDir)) { this.setInputToken(request.getString("InputToken").trim()); this.searchIndex(db); } else { this.setStatus("<h4> Cannot search for " + request.getString("InputToken") + " Please create index first.</h4> "); } this.SaveUseCase(request.getString("InputToken").trim(), db, "Simple"); this.setInputToken(""); } if ("ExpandQuery".equals(request.getAction())) { String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.indexdir", tmp + "indexdir/"); System.out.println(">>>>>>>>>system>>>>>>>>>" + System.getProperty("java.io.tmpdir.indexdir")); if (useOntologies.compareTo("\"false\"") == 0) { this.setStatus("<h3> You cannot use expand query option. Please adjust your configuration file to include ontologies.</h3>"); } else { this.setInputToken(request.getString("InputToken").trim()); this.ExpandQuery(db); } } } private void SaveUseCase(String request, Database db, String SearchType) { // save request in useCase try { // Database db = new app.JDBCDatabase("molgenis.properties"); // create a new entity instance for use Case and add the new search // from the user UseCase nuc = new UseCase(); // check if the use case is not too long for column 'name' : | name // | varchar(255) | NO | | NULL | if (request.length() > 255) { request = request.substring(0, 255); } nuc.setUseCaseName(request); nuc.setSearchType(SearchType); db.add(nuc); db.add(nuc); } catch (DatabaseException e) { e.printStackTrace(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } // List<UseCase> } /** * For all DB tables , for all entities in each field , call search to * search each field. * * @param db */ public void search(Database db, String[] ResultHeaders) { int hits = 0; result = "<p><table width=\"100%\" border=\"2\" bordercolor=\"#BDCDDA\" cellspacing=\"3\" cellpadding=\"3\"><tr>"; for (int i = 0; i < ResultHeaders.length; i++) { result += "<td><b>" + ResultHeaders[i] + "</b></td>"; } result += "</tr> "; List<String> dbfields = new ArrayList<String>(); try { for (Entity entity : db.getMetaData().getEntities()) { for (Field f : entity.getFields()) { System.out.println(">>>>>> db entities " + f.getName()); if (!dbfields.contains(f.getName())) dbfields.add(f.getName()); } } } catch (DatabaseException e) { e.printStackTrace(); } catch (MolgenisModelException e) { e.printStackTrace(); } for (int i = 0; i < dbfields.size(); i++) { hits += this.searchIndex(db, dbfields.get(i)); } result += "</table>"; result += "<p>Number of hits : " + hits; // if (hits==0) result = "<p>No records found in db index for the term " // + this.getInputToken() + "</p>"; this.setStatus(result); } /** * For all DB tables , for all entities in each field , call * SearchAllDBFiledIndex to search each field. * * @param db */ public void searchIndex(Database db) { int hits = 0; result = "<p><table width=\"100%\" border=\"2\" bordercolor=\"#BDCDDA\" cellspacing=\"3\" cellpadding=\"3\"><tr>" + // "<td><b>Entity</b></td>" + "<td><b>Biobank</b></td>" + "<td><b>Feature</b></td>" + "<td><b>Highlighted result</b></td>" + "<td><b>Other Sources</b></td><td><b>Score</b></td></tr> "; // "<td><b>Term frequency</b></td><td><b>IDF</b></td><td><b>Field weight</b></td></tr>"; // //result = "<p>Number of hits : " + hits.length + // "<p><table width=\"100%\" border=\"2\" bordercolor=\"black\" cellspacing=\"3\" cellpadding=\"3\"><tr><td><b>Investigation</b></td><td><b>Investigation Name</b></td><td><b>Description</b></td><td><b>Score</b></td></tr>"; /** * alternative way to go through all database tables and fields. * Apparently such a search adds an extra undesirable cost */ /* * try { for(Class<Entity> aClass: db.getEntityClasses()) { for(Entity * e: (List<Entity>)db.find(aClass)) { String dbtable = * aClass.getName(); * * for(String fieldName: e.getFields()) { if (e.get(fieldName) != null) * { hits += this.SearchAllDBFiledIndex(db, fieldName, result); } } } } * } catch (DatabaseException e) { e.printStackTrace(); } */ List<String> dbfields = new ArrayList<String>(); try { for (Entity entity : db.getMetaData().getEntities()) { for (Field f : entity.getFields()) { System.out.println(">>>>>> db entities " + f.getName()); if (!dbfields.contains(f.getName())) dbfields.add(f.getName()); } } } catch (DatabaseException e) { e.printStackTrace(); } catch (MolgenisModelException e) { e.printStackTrace(); } for (int i = 0; i < dbfields.size(); i++) { hits += this.searchIndex(db, dbfields.get(i)); } result += "</table>"; result += "<p>Number of hits : " + hits; // if (hits==0) result = "<p>No records found in db index for the term " // + this.getInputToken() + "</p>"; this.setStatus(result); } public static String removeChar(String s, char c) { String r = ""; if (s == null) System.out.println(s + " is empty"); else { for (int i = 0; i < s.length(); i++) { if (s.charAt(i) != c) r += s.charAt(i); } } return r; } /** * The main function where the search in index is performed. The DB table * field is passed as argument. PorterStemAnalyzer is used. * * @param db * @param fieldName * @param result * @return */ public int searchIndex(Database db, String fieldName) { // LuceneConfiguration LC = new LuceneConfiguration(); String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.indexdir", tmp + "indexdir/"); String IndexDir = System.getProperty("java.io.tmpdir.indexdir"); System.out.println(">>>>>>>>>system>>>>>>>>>" + IndexDir); String userQuery = this.getInputToken(); // this.setStatus("Starting search for " + userQuery + " (all)index in " // + LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY")); // System.out.println("Starting search for " + userQuery + // " (all)index in " + // LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY")); this.setStatus("Starting search for " + userQuery + " (all)index in " + IndexDir); System.out.println("Starting search for " + userQuery + " (all)index in " + IndexDir); IndexReader reader = null; // StandardAnalyzer analyzer = null; PorterStemAnalyzer analyzer = null; IndexSearcher searcher = null; TopScoreDocCollector collector = null; QueryParser parser = null; Query query = null; ScoreDoc[] hits = null; List<String> fieldnames = new ArrayList<String>(); System.out.println("the fieldname in which we are searching " + fieldName); try { analyzer = new PorterStemAnalyzer(); // Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); File file = new File(IndexDir); // File file = new // File(LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY")); reader = IndexReader.open(FSDirectory.open(file), true); searcher = new IndexSearcher(reader); Searchable[] indexes = new IndexSearcher[1]; indexes[0] = searcher; collector = TopScoreDocCollector.create(1000, false); parser = new QueryParser(Version.LUCENE_30, fieldName, analyzer); query = parser.parse(userQuery); // Search the query searcher.search(query, collector); hits = collector.topDocs().scoreDocs; System.out.println("Number of hits : " + hits.length); if (hits.length > 0) { System.out.println("Number of hits: " + hits.length); for (int i = 0; i < hits.length; i++) { int scoreId = hits[i].doc; Explanation explanation = searcher.explain(query, scoreId); Document document = searcher.doc(scoreId); String text = document.getField(fieldName).stringValue(); // Highlighting the hits: String highlighted = text; TokenStream tokenStream = TokenSources.getTokenStream(fieldName, text, analyzer); Formatter f = new SimpleHTMLFormatter("<font style=\"background-color: yellow;\">", "</font>"); Highlighter highlighter = new Highlighter(f, new QueryScorer(query, fieldName)); try { highlighted = highlighter.getBestFragments(tokenStream, text, 5, "..."); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } // adding db table field and Lucene score to the result // if (i==0) result += // " <tr><td><b class=\"link\" id=\"anElement\" onclick=\"Javascript:toggleElement('line');\"> <img src=\"res/img/Orange_plus.png\" width=\"12\" height=\"12\" alt=\"plus\" /><td>"; // The field Entity is not interesting for the simple user // result += "<tr id='line'><td>" + // document.getField("className").stringValue() +"</td>"; result += "<tr id='line'>"; if (document.getField("investigationNameField") != null) result += "<td>" + document.getField("investigationNameField").stringValue() + "</td>"; else result += "<td> </td>"; // Retrieve Id if (document.getField("id") != null) { String id = document.getField("id").stringValue(); // TODO Danny Use or Loose /* int aInt = */Integer.parseInt(id); // result += // "<td> <a href=http://localhost:8080/pheno/molgenis.do?__target=main&select=Biobank&__action=filter_set&__filter_attribute=id&__filter_operator=EQUALS&__filter_value="+id+">" // + fieldName + "</a><br/></td>"; if (!fieldnames.contains(fieldName)) fieldnames.add(fieldName); result += "<td> <a href=molgenis.do?__target=Biobanks&__action=filter_add&__filter_attribute=id&__filter_operator=EQUALS&__filter_value=" + id + ">" + fieldName + "</a><br/></td>"; // result += // "<td> <a href=http://localhost:8080/gcc/molgenis.do?__target=DataViews&__action=filter_set&__filter_attribute=id&__filter_operator=EQUALS&__filter_value="+id // +">" + fieldName + "</a><br/></td>"; } // working // http://localhost:8080/pheno/molgenis.do?__target=Biobanks&__action=filter_set&__filter_attribute=id&__filter_operator=EQUALS&__filter_value=3 // result += "<td>" + // "<a href=http://localhost:8080/pheno/molgenis.do?__target=main&select=Biobanks&__action=filter_set&__filter_attribute=id&__filter_operator=EQUALS&__filter_value="+id+">" // + id + "</a><br/></td>" ; // result += "<td>" + // "<a href=http://localhost:8080/pheno/molgenis.do?__target=Biobank&__action=filter_set&__filter_attribute=id&__filter_operator=EQUALS&__filter_value="+id+">" // + id + "</a><br/></td>" ; // http://localhost:8080/pheno/molgenis.do?__target=main&select=Biobanks&__action=filter_set&__filter_attribute=id&__filter_operator=EQUALS&__filter_value=3 // http://gbicserver1.biol.rug.nl:8080/xgap4exampledatasets/molgenis.do?__target=molgenis_main_Investigations&__action=filter_set&__filter_attribute=id&__filter_operator=EQUALS&__filter_value=3 // http://localhost:8080/pheno/molgenis.do?__target=Biobank&__action=filter_set&__filter_attribute=id&__filter_operator=equals&__filter_value=83 // "molgenis.do?__target=Biobanks&__action=filter_add&__filter_attribute=id&__filter_operator=EQUALS&__filter_value="+id" // Retrieve term accession from OntologyTerm Entity from DB // . This option requires the import of all concept wiki // terms in DB . // Query<OntologyTerm> q = db.query(OntologyTerm.class); /* * org.molgenis.framework.db.Query<OntologyTerm> q = * db.query(OntologyTerm.class); q.addRules(new * QueryRule("term", Operator.EQUALS, fieldName)); * * List<OntologyTerm> valueList = q.find(); if (valueList != * null) { //todo : check the size * System.out.println("****Ontology term retrieved for " * +fieldName +valueList); } else { * System.out.println("***No Ontology term for "+fieldName * ); * * } */ result += "<td>" + highlighted + "</td>"; // Search the user term in concept wiki // Ask Christina : the user term that will be searched in // concept wiki : // 1. Needs to be clean up ,Should this be done by hand or // need some specialist ? // 2. Some terms can be found capitaliazed e.g HIV , but not // in hiv ..: currently capitalizing the terms, // One of the below is correct: can concept wiki return some // kind of 505/404/...error so I can check that? result += "<td><a href=http://conceptwiki.org/index.php/Term:" + userQuery.toUpperCase() + ">ConceptWiki</a> " + "OR..." + "<a href=http://conceptwiki.org/index.php/Term:" + userQuery + ">ConceptWiki</a> </td>"; // Score added result += "<td>" + explanation.toString().split("=")[0] + "</td>"; /** * There are these are more Lucene technical variables that * can be added: Term frequency, IDF, Field weight; **/ /* * result += "<td>" + explanation.toString().split("=")[1] + * "</td>"; result += "<td>" + * explanation.toString().split("=")[3] + "</td>"; result += * "<td>" + explanation.toString().split("=")[4] + "</td>"; * result += "<td>" + explanation.toString().split("=")[6] + * "</td>"; result += "<td>" + * explanation.toString().split("=")[7] + "</td>"; result += * "<td>" + explanation.toString().split("=")[8] + "</td>"; * result += "<td>" + explanation.toString().split("=")[9] + * "</td></tr>"; result += "<td>" + explanation.toString() + * "</td></tr>"; */ /* * result += "<td>" + explanation.toHtml() + "</td>"; * System. * out.println("---------- EXPLANATION -------------"); * System.out.println("DESCRIPTION = " + * document.getField("description").stringValue()); * System.out.println("explanation: " + * explanation.toHtml()); */ System.out.println(explanation.toString()); System.out.println("explanation: " + explanation.toHtml()); } // this.setStatus(result); System.out.println(result); } // IndexReader.close(); System.out.println("Try closing the index reader "); reader.close(); } catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } System.out.println("Fieldnames: " + fieldnames); if (hits != null) return hits.length; return 0; } public boolean DirectoryhasContents(String directory) { File dir = new File(directory); boolean exists = dir.exists(); if (exists == false) { System.out.println("The directory " + directory + "does not exists. Creating directory. "); boolean success = (new File(directory)).mkdir(); if (success) { System.out.println("Directory: " + directory + " created"); } } boolean isEmpty = false; System.out.println("checking " + dir.getAbsolutePath()); System.out.println("isEmpty: " + isEmpty); if (dir.exists() && dir.isDirectory()) { if (dir.list().length == 0) { this.setStatus("<h4> The directory is empty</h4> "); System.out.println("The directory is empty."); return true; } else { // File[] files = dir.listFiles(); this.setStatus("<h4> The directory is NOT empty or does not exists .</h4> "); System.out.println("The directory is NOT empty or does not exists ."); } } else { return false; } return false; } @Override public void reload(Database db) { } public void setStatus(String status) { Status = status; } public String getStatus() { return Status; } public void setInputToken(String inputToken) { InputToken = inputToken; } public String getInputToken() { return InputToken; } public String getuseOntologies() { LuceneConfiguration LC = new LuceneConfiguration(); String useOntologies = LC.GetLuceneConfiguration("USE_ONTOLOGIES"); if (useOntologies.compareTo("\"false\"") == 0) { System.out.println("---From getuseOntologies() : Not using ontologies---"); return "false"; } return "true"; } /** * The function for query expansion. Creates a new (empty) instance of * OntocatQueryExpansion_lucene class * * @param db */ public void ExpandQuery(Database db) { OntocatQueryExpansion_lucene q = new OntocatQueryExpansion_lucene(); List<String> parsed = q.parseQuery(getInputToken()); if (!OntologiesForExpansion.isEmpty()) { System.out.println("Expanding the query..."); q.expand(OntologiesForExpansion); System.out.println("\nThe expanded query: "); for (String s : q.init_query) System.out.println(s); String res = q.output(parsed); System.out.println(res); System.out.println("Finished expanding... "); this.setInputToken(res); } this.searchIndex(db); this.SaveUseCase(getInputToken(), db, "Expanded"); this.setInputToken(""); System.out.println("Finished serching... "); } }