/* Date: November 30, 2010 * * * Despoina Antonakaki <D.Antonakaki@rug.nl> */ package plugins.LuceneIndex; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.Set; import java.util.Vector; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.Version; import org.molgenis.framework.db.Database; import org.molgenis.framework.ui.PluginModel; import org.molgenis.framework.ui.ScreenController; import org.molgenis.organization.InvestigationElement; import org.molgenis.util.Entity; import org.molgenis.util.Tuple; import uk.ac.ebi.ontocat.Ontology; import uk.ac.ebi.ontocat.OntologyService; import uk.ac.ebi.ontocat.OntologyServiceException; import uk.ac.ebi.ontocat.OntologyTerm; import uk.ac.ebi.ontocat.file.FileOntologyService; public class AdminIndexes extends PluginModel<org.molgenis.util.Entity> { /** * */ private static final long serialVersionUID = 1L; private String Status = ""; LuceneConfiguration LC = new LuceneConfiguration(); private String InputToken = "lung disease"; public static final Map<String, String> ontologyNamesMap = new HashMap<String, String>() { /** * */ private static final long serialVersionUID = 1L; LuceneConfiguration LC = new LuceneConfiguration(); { put(LC.GetLuceneConfiguration("ONTOLOGIES_DIRECTORY") + "human-phenotype-ontology_v1294.obo", "Human Phenotype Ontology"); put(LC.GetLuceneConfiguration("ONTOLOGIES_DIRECTORY") + "human_disease_v1.251.obo", "Human Disease"); put(LC.GetLuceneConfiguration("ONTOLOGIES_DIRECTORY") + "Thesaurus_10_03.owl", "NCI Thesaurus"); put(LC.GetLuceneConfiguration("ONTOLOGIES_DIRECTORY") + "mesh.obo", "MeSH"); } }; public static void main(String[] args) throws Exception { OntoCatIndexPlugin p = new OntoCatIndexPlugin("x", null); p.buildIndexOntocat(); List<String> ontologies = new ArrayList<String>(); // ontologies.add("Human Phenotype Ontology"); ontologies.add("Human Disease"); ontologies.add("NCI Thesaurus"); // ontologies.add("MeSH"); p.setInputToken("cystic lung disease"); p.setStatus("x"); // p.SearchIndexOntocat("asthma", ontologies); } public AdminIndexes(String name, ScreenController<?> parent) { super(name, parent); } @Override public String getViewName() { return "plugin_LuceneIndex_AdminIndexes"; } @Override public String getViewTemplate() { return "plugins/LuceneIndex/AdminIndexes.ftl"; } @Override public void handleRequest(Database db, Tuple request) { // LuceneConfiguration LC = new LuceneConfiguration(); if ("CreateLuceneIndex".equals(request.getAction())) { String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.indexdir", tmp + "indexdir/"); String IndexDir = System.getProperty("java.io.tmpdir.indexdir"); System.out.println(">>>>>>>>>Index created in tmp directory >>>>>>>>" + IndexDir); // if // (!this.DirectoryhasContents(LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY"))) // { if (!this.DirectoryhasContents(IndexDir)) { // this.setStatus("<h4> Index already exists in " + // LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY") + "</h4>" // ) ; this.setStatus("<h4> Index already exists in " + IndexDir + "</h4>"); } else { this.createIndex(db); } } if ("DeleteLuceneIndex".equals(request.getAction())) { this.deleteIndex(); } /** * Unfortunately most of the times this option is not successful through * the UI In order to build the index check (Properties) the run * Configurations of this file- set -Xms1024M -Xmx1024M and run in * server. Pleasy also check the terms on which the search in the * ontologies is build. By default two ontologies are inserted * ontologies.add("Human Disease"); and ontologies.add("NCI Thesaurus"); * and the input is p.setInputToken("cystic lung disease"); * */ if ("CreateOntocatLuceneIndex".equals(request.getAction())) { String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.OntocatIndexdir", tmp + "OntocatIndexdir/"); String OntocatIndexdir = System.getProperty("java.io.tmpdir.OntocatIndexdir"); System.out.println(">>>>>>>>>Index created in tmp directory >>>>>>>>" + OntocatIndexdir); // if // (!this.DirectoryhasContents(LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY"))) // { if (!this.DirectoryhasContents(OntocatIndexdir)) { // this.setStatus("<h4> Index on Ontocat already created in directory " // + LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY") + // "</h4>"); this.setStatus("<h4> Index on Ontocat already created in directory " + OntocatIndexdir + "</h4>"); } else { try { this.buildIndexOntocat(); } catch (Exception e) { e.printStackTrace(); } } } if ("DeleteOntocatIndex".equals(request.getAction())) { this.DeleteOntocatIndex(); } } public void buildIndex(Database db) throws Exception { IndexWriter writer = null; // StandardAnalyzer analyzer = null; PorterStemAnalyzer analyzer = null; File file = null; String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.Indexdir", tmp + "Indexdir/"); String Indexdir = System.getProperty("java.io.tmpdir.Indexdir"); System.out.println(">>>>>>>>>Index created in tmp directory >>>>>>>>" + Indexdir); try { System.out.println("Start indexing ... "); /** * get a reference to index directory file */ // LuceneConfiguration LC = new LuceneConfiguration(); // file = new // File(LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY")); file = new File(Indexdir); analyzer = new PorterStemAnalyzer(); // analyzer = new StandardAnalyzer(Version.LUCENE_30); writer = new IndexWriter(FSDirectory.open(file), analyzer, true, IndexWriter.MaxFieldLength.LIMITED); String fullText = null; List<Class<? extends Entity>> classList = db.getEntityClasses(); for (Class<? extends Entity> aClass : classList) { for (Entity e : (List<? extends Entity>) db.find(aClass)) { Document document1 = null; document1 = new Document(); fullText = aClass.getName(); // System.out.println("[DEBUG]"+fullText); for (String fieldName : e.getFields()) { Field ClassName = new Field("className", aClass.getName().toString(), Field.Store.YES, Field.Index.ANALYZED); document1.add(ClassName); System.out.println("the classes that are included in the index " + aClass.getName().toString()); if (e.get(fieldName) != null) { Field InsertFieldValue = new Field(fieldName, e.get(fieldName).toString(), Field.Store.YES, Field.Index.ANALYZED); if (e instanceof InvestigationElement) { if (((InvestigationElement) e).getInvestigation_Name() == null) { System.out.println("Investigation Element is null"); } else { Field investigationNameField = new Field("investigationNameField", ((InvestigationElement) e).getInvestigation_Name(), Field.Store.YES, Field.Index.NO); document1.add(investigationNameField); } } document1.add(InsertFieldValue); System.out.println("All : 1st (" + fieldName + ")as InsertFieldValue inserted in Index" + InsertFieldValue.toString()); // this is the same as InsertFieldValue. Though if // you remove it , the field is not included and // search does not work. fullText = fullText + " " + e.get(fieldName).toString(); Field fullTextField = new Field("fulltext", fullText, Field.Store.NO, Field.Index.ANALYZED); document1.add(fullTextField); System.out.println("All : FULLTEXT ( fulltext as anotherField inserted in Index" + fullTextField.toString()); } // writer.addDocument(document1); //this produces // multiple entries in the index } writer.addDocument(document1); } } // optimize the index System.out.println("Optimizing index"); writer.optimize(); } catch (Exception e) { e.printStackTrace(); } finally { try { if (writer != null) { writer.close(); System.out.println("Closed writer"); } System.out.println("Finished indexing"); } catch (Exception ex) { ex.printStackTrace(); } } } /** * Add one document to the Lucene index * * @param <E> */ public static <E extends org.molgenis.bbmri.Biobank> void updateIndex(List<E> entities) { /** * reopen the index in order to add new db record */ IndexWriter writer = null; PorterStemAnalyzer analyzer = null; File file = null; String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.Indexdir", tmp + "Indexdir/"); String Indexdir = System.getProperty("java.io.tmpdir.Indexdir"); System.out.println(">>>>>>>>>Index created in tmp directory >>>>>>>>" + Indexdir); System.out.println("Start updating index ... "); /** * get a reference to index directory file */ // LuceneConfiguration LC = new LuceneConfiguration(); // file = new File(LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY")); file = new File(Indexdir); // Either public StandardAnalyzer(Version matchVersion, File stopwords) // can be used in order to add a STOP WORD file // analyzer = new StandardAnalyzer(Version.LUCENE_30); analyzer = new PorterStemAnalyzer(); try { writer = new IndexWriter(FSDirectory.open(file), analyzer, true, IndexWriter.MaxFieldLength.LIMITED); String fullText = null; Document document1 = null; document1 = new Document(); Class<? extends Entity> aClass = (Class<? extends Entity>) entities.getClass(); System.out.println("AddDBIndexRecors" + aClass); fullText = aClass.getName(); ListIterator<E> l = entities.listIterator(); while (l.hasNext()) { } // The entities are in the form : [elementData][0][_gwaPlatform] // for (i=0; i< entities[elementData].length() ) // for(String fieldName: ((Biobank) entities).getFields()) { // //produces exception : java.util.ArrayList? cannot be cast to // org.molgenis.bbmri.Biobank[edit] while (l.hasNext()) { Vector<String> VfieldName = l.next().getFields(); System.out.println("@Entities" + l.next().get__Type().toString()); for (String fieldName : VfieldName) { System.out.println("@VfieldName" + fieldName); Field ClassName = new Field("className", aClass.getName().toString(), Field.Store.YES, Field.Index.ANALYZED); document1.add(ClassName); System.out.println("The new classes that are included in the index by the decorator " + aClass.getName().toString()); if (fieldName != null) { Field InsertFieldValue = new Field(fieldName, fieldName.toString(), Field.Store.YES, Field.Index.ANALYZED); if (entities instanceof InvestigationElement) { if (((InvestigationElement) entities).getInvestigation_Name() == null) { System.out.println("Investigation Element is null"); } else { Field investigationNameField = new Field("investigationNameField", ((InvestigationElement) entities).getInvestigation_Name(), Field.Store.YES, Field.Index.NO); document1.add(investigationNameField); } } document1.add(InsertFieldValue); System.out.println("from DBUpdateDecorator All : 1st (" + fieldName + ")as InsertFieldValue inserted in Index" + InsertFieldValue.toString()); // this is the same as InsertFieldValue. Though if you // remove it , the field is not included and search does // not work. fullText = fullText + " " + fieldName.toString(); Field fullTextField = new Field("fulltext", fullText, Field.Store.NO, Field.Index.ANALYZED); document1.add(fullTextField); System.out.println("All : FULLTEXT ( fulltext as anotherField inserted in Index" + fullTextField.toString()); } } writer.addDocument(document1); System.out.println("Optimizing index"); writer.optimize(); } } catch (CorruptIndexException e1) { e1.printStackTrace(); } catch (LockObtainFailedException e1) { e1.printStackTrace(); } catch (IOException e1) { e1.printStackTrace(); } } public void createIndex(Database db) { this.setStatus("Start indexing "); try { this.buildIndex(db); } catch (Exception e) { e.printStackTrace(); } // LuceneConfiguration LC = new LuceneConfiguration(); String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.Indexdir", tmp + "Indexdir/"); String Indexdir = System.getProperty("java.io.tmpdir.Indexdir"); System.out.println(">>>>>>>>>Index created in tmp directory >>>>>>>>" + Indexdir); // this.setStatus("<h4>Finished indexing. Index created in </h4>" + // LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY")); this.setStatus("<h4>Finished indexing. Index created in </h4>" + Indexdir); } @Override public void reload(Database db) { this.setStatus(""); } public void setStatus(String status) { Status = status; } public String getStatus() { return Status; } public void setInputToken(String inputToken) { InputToken = inputToken; } public String getInputToken() { return InputToken; } /** * The function deletes the DB index . The path is retrieved through * LuceneConfiguration index . The variable in LuceneIndexConfiguration * files defines if the program runs at mac or pc, in order to use the * proper directory separators. */ public void deleteIndex() { String msWin; // String indexDir; String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.Indexdir", tmp + "Indexdir/"); String Indexdir = System.getProperty("java.io.tmpdir.Indexdir"); System.out.println(">>>>>>>>>Index created in tmp directory >>>>>>>>" + Indexdir); LuceneConfiguration LC = new LuceneConfiguration(); // System.out.println("coming from deleteLuceneIndex" + // (LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY"))); System.out.println("coming from deleteLuceneIndex" + Indexdir); msWin = LC.GetLuceneConfiguration("msWin"); // indexDir = LC.GetLuceneConfiguration("LUCENE_INDEX_DIRECTORY"); // this.setStatus("<h4>About to delete the contents of the DB index "+ // indexDir +"</h4>"); this.setStatus("<h4>About to delete the contents of the DB index " + Indexdir + "</h4>"); // browse to the index directory // deleteDirContents( indexDir, 0, msWin); deleteDirContents(Indexdir, 0, msWin); // this.setStatus("<h4>Contents of index directory "+ indexDir + // " deleted </h4>"); this.setStatus("<h4>Contents of index directory " + Indexdir + " deleted </h4>"); } private void deleteDirContents(String fname, int deep, String msWin) { String DirSeparator = null; String FileName = null; File dir = new File(fname); String[] chld = dir.list(); if (msWin.compareTo("\"false\"") == 0) { DirSeparator = "/"; System.out.println("Hi, I am a mac"); } else if (msWin.compareTo("\"true\"") == 0) { DirSeparator = "\\"; System.out.println("Hi, I am a pc"); } if (dir.isFile()) { System.out.println("dirlist" + dir.getName()); return; } else if (dir.isDirectory()) { System.out.println(fname.substring(fname.lastIndexOf(DirSeparator))); for (int i = 0; i < chld.length; i++) { FileName = fname + DirSeparator + chld[i]; File subFile = new File(FileName); deleteDirContents(FileName, 0, msWin); System.out.println("deleting " + fname + DirSeparator + chld[i]); // deleting every file if (!subFile.canWrite()) throw new IllegalArgumentException("Delete: write protected: " + FileName); else this.setStatus("<h4>I can delete " + FileName + "</h4>"); // If it is a directory, make sure it is empty - This shouldn't // be reached : index does not contains directories. if (subFile.isDirectory()) { String[] files = subFile.list(); if (files.length > 0) throw new IllegalArgumentException("Delete: directory not empty: " + FileName); } // Attempt to delete it boolean success = subFile.delete(); if (!success) throw new IllegalArgumentException("Delete: deletion failed"); } } } public boolean DirectoryhasContents(String directory) { File dir = new File(directory); boolean exists = dir.exists(); if (exists == false) { System.out.println("The directory " + directory + "does not exists. Creating directory. "); boolean success = (new File(directory)).mkdir(); if (success) { System.out.println("Directory: " + directory + " created"); } } boolean isEmpty = false; System.out.println("checking " + dir.getAbsolutePath()); System.out.println("isEmpty: " + isEmpty); if (dir.exists() && dir.isDirectory()) { if (dir.list().length == 0) { this.setStatus("<h4> The directory is empty</h4> "); System.out.println("The directory is empty."); return true; } else { // File[] files = dir.listFiles(); this.setStatus("<h4> The directory is NOT empty or does not exists .</h4> "); System.out.println("The directory is NOT empty or does not exists ."); } } else { return false; } return false; } /** * The function deletes the DB index . The path is retrieved through * LuceneConfiguration index . The variable in LuceneIndexConfiguration * files defines if the program runs at mac or pc, in order to use the * proper directory separators. */ public void DeleteOntocatIndex() { String msWin; // String OntoIndexDir; String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.OntocatIndexdir", tmp + "OntocatIndexdir/"); String OntocatIndexdir = System.getProperty("java.io.tmpdir.OntocatIndexdir"); System.out.println(">>>>>>>>>Index created in tmp directory >>>>>>>>" + OntocatIndexdir); LuceneConfiguration LC = new LuceneConfiguration(); // System.out.println("coming from deleteOntocatIndex" + // (LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY"))); System.out.println("coming from deleteOntocatIndex" + OntocatIndexdir); msWin = LC.GetLuceneConfiguration("msWin"); // OntoIndexDir = // LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY"); // this.setStatus("<h4>About to delete the contents of the Ontocat index "+ // OntoIndexDir +"</h4>"); this.setStatus("<h4>About to delete the contents of the Ontocat index " + OntocatIndexdir + "</h4>"); // browse to the index directory // deleteDirContents( OntoIndexDir, 0, msWin); deleteDirContents(OntocatIndexdir, 0, msWin); // this.setStatus("<h4>Contents of index directory "+ OntoIndexDir + // " deleted </h4>"); this.setStatus("<h4>Contents of index directory " + OntocatIndexdir + " deleted </h4>"); } /** * The function creates an index on ontocat returned data. The data is * ontology name, term, synonyms + children * * @throws OntologyServiceException * @throws IllegalAccessException * @throws InstantiationException */ public void buildIndexOntocat() throws Exception { try { /** * An IndexWriter creates and maintains an index. analyzer isn't * used */ LuceneConfiguration LC = new LuceneConfiguration(); IndexWriter writer = null; StandardAnalyzer analyzer = null; File file = null; String tmp = System.getProperty("java.io.tmpdir"); System.setProperty("java.io.tmpdir.OntocatIndexdir", tmp + "OntocatIndexdir/"); String OntocatIndexdir = System.getProperty("java.io.tmpdir.OntocatIndexdir"); System.out.println(">>>>>>>>>Index created in tmp directory >>>>>>>>" + OntocatIndexdir); try { System.out.println("Start Indexing Ontocat results"); // this.setStatus("Starting indexing Ontocat results in " + // LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY")); this.setStatus("Starting indexing Ontocat results in " + OntocatIndexdir); // file = new // File(LC.GetLuceneConfiguration("LUCENE_ONTOINDEX_DIRECTORY")); file = new File(OntocatIndexdir); analyzer = new StandardAnalyzer(Version.LUCENE_30); writer = new IndexWriter(FSDirectory.open(file), analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); for (String ontology_file : ontologyNamesMap.keySet()) { say("now " + writer.getReader().numDocs() + " terms indexed"); File file1 = new File(ontology_file); OntologyService os = new FileOntologyService(file1.toURI()); Ontology onto = os.getOntologies().get(0); String label = ontologyNamesMap.get(ontology_file); Set<OntologyTerm> all_terms = new HashSet<OntologyTerm>(); all_terms = os.getAllTerms(onto.getOntologyAccession()); for (OntologyTerm term : all_terms) { /** * getting the term with ontology label inside the index * for each term we use a separate Document */ Document document = new Document(); Field termField = new Field("term", term.getLabel().toLowerCase(), Field.Store.YES, Field.Index.NOT_ANALYZED); document.add(termField); // Field ontologyAccessionField = new // Field("ontologyAccession", accss, Field.Store.YES, // Field.Index.NOT_ANALYZED); // document.add(ontologyAccessionField); Field ontologyLabelField = new Field("ontologyLabel", label, Field.Store.YES, Field.Index.NOT_ANALYZED); document.add(ontologyLabelField); /** * searching for synonyms and children in ontology, * writing them to "expansion" with delimiters ";" */ List<OntologyTerm> children = new ArrayList<OntologyTerm>(); List<String> syns = new ArrayList<String>(); String expansion = ""; syns = os.getSynonyms(term); for (String s : syns) { if (term.getLabel().toLowerCase() != s) { // if it doesn't already exists s = "\"" + s.toLowerCase() + "\""; if (!expansion.contains(s)) expansion += ";" + s; } } children = os.getChildren(term); for (OntologyTerm t : children) { String t_str = "\"" + t.getLabel().toLowerCase() + "\""; if (!expansion.contains(t_str)) expansion += ";" + t_str; } Field expansionField = new Field("expansion", expansion.trim(), Field.Store.YES, Field.Index.NO); document.add(expansionField); // adding a Document to a IndexWriter writer.addDocument(document); } } /** * optimize the index */ System.out.println(": Optimizing Index :"); this.setStatus("Optimizing Ontocat Index"); writer.optimize(); } catch (Exception e) { e.printStackTrace(); } finally { try { if (writer != null) System.out.println(writer.getReader().numDocs()); writer.close(); System.out.println("Finished indexing Ontocat"); this.setStatus("Ontocat Indexing finished"); } catch (Exception ex) { ex.printStackTrace(); } } } catch (Exception e) { e.printStackTrace(); } } public void say(String whatTosay) { System.out.println(whatTosay); } }