/* * DrakkarKeel - An Enterprise Collaborative Search Platform * * The contents of this file are subject under the terms described in the * DRAKKARKEEL_LICENSE file included in this distribution; you may not use this * file except in compliance with the License. * * 2013-2014 DrakkarKeel Platform. */ package drakkar.mast.retrieval; import drakkar.oar.DocumentMetaData; import drakkar.oar.facade.event.FacadeDesktopListener; import static drakkar.oar.util.KeyMessage.*; import drakkar.oar.util.KeySearchable; import drakkar.oar.util.OutputMonitor; import drakkar.mast.IndexException; import drakkar.mast.SearchException; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Date; import java.util.List; import lemurproject.indri.IndexEnvironment; import lemurproject.indri.IndexStatus; import lemurproject.indri.QueryEnvironment; import lemurproject.indri.QueryRequest; import lemurproject.indri.QueryResult; import lemurproject.indri.QueryResults; import lemurproject.indri.Specification; import lemurproject.lemur.Index; import lemurproject.lemur.IndexManager; /** * Clase que implementa el motor de búsqueda Indri-Lemur, versión 4.10 * * */ public class IndriContext extends EngineContext { private Index theIndex; private boolean appendIndex; private File idxCS; //saving file for case sensitive index private File idxCI; //saving file for case insensitive index private IndexEnvironment envCS; private IndexEnvironment envCI; private IndexStatus status; private Specification spec; private String[] retval; private QueryEnvironment queryEnv; private QueryRequest qrequest; private QueryResults qresults; private File indexPathCI;//for case insensitive search private QueryResult[] queryResults; private QueryResult queryResultObj; /** * constructor por defecto */ public IndriContext() { super(); this.defaultIndexPath = "./index/indri/"; } /** * constructor * * @param listener --oyente de los procesos realizados por este motor */ public IndriContext(FacadeDesktopListener listener) { super(listener); this.defaultIndexPath = "./index/indri/"; } /** * {@inheritDoc} */ public ArrayList<DocumentMetaData> search(String query, boolean caseSensitive) throws SearchException { ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>(); this.queryEnv = new QueryEnvironment(); this.qrequest = new QueryRequest(); this.qresults = new QueryResults(); try { setStartTimeOfSearch(new Date()); if (checkIndexPath(this.indexPath.getPath())) { if (caseSensitive) { this.queryEnv.addIndex(this.indexPath.getPath()); this.theIndex = lemurproject.lemur.IndexManager.openIndex(this.indexPath.getPath()); this.qrequest.query = query; this.qrequest.resultsRequested = this.theIndex.docCount(); this.qresults = this.queryEnv.runQuery(this.qrequest); } else { this.queryEnv.addIndex(this.indexPathCI.getPath()); this.theIndex = lemurproject.lemur.IndexManager.openIndex(this.indexPathCI.getPath()); this.qrequest.query = query; this.qrequest.resultsRequested = this.theIndex.docCount(); this.qresults = this.queryEnv.runQuery(this.qrequest); } finalResultsList = saveResults(this.qresults, query, this.queryEnv); if (finalResultsList.size() > 1) { deleteRepeated(finalResultsList); } this.finalMetaResult = finalResultsList; setEndTimeOfSearch(new Date()); String message = "Indri retrieved " + finalResultsList.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'. for both "; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); } else { this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect"); } } catch (Exception ex) { throw new SearchException(ex.getMessage()); } this.retrievedDocsCount += finalResultsList.size(); return finalResultsList; } /** * {@inheritDoc} */ public ArrayList<DocumentMetaData> search(String query, String docType, boolean caseSensitive) throws SearchException { ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>(); ArrayList<DocumentMetaData> tempList = new ArrayList<DocumentMetaData>(); setStartTimeOfSearch(new Date()); tempList = search(query, caseSensitive); //search in all collections of documents finalResultsList = this.filterMetaDocuments(docType, tempList); this.finalMetaResult = finalResultsList; setEndTimeOfSearch(new Date()); String message = "Indri retrieved " + this.finalMetaResult.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'. for doctype " + docType; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); this.retrievedDocsCount += finalResultsList.size(); return finalResultsList; } /** * {@inheritDoc} */ public ArrayList<DocumentMetaData> search(String query, String[] docTypes, boolean caseSensitive) throws SearchException { ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>(); ArrayList<DocumentMetaData> tempList = new ArrayList<DocumentMetaData>(); setStartTimeOfSearch(new Date()); for (int i = 0; i < docTypes.length; i++) { String doc = docTypes[i]; tempList = search(query, doc, caseSensitive); finalResultsList.addAll(tempList); } if (finalResultsList.size() > 1) { deleteRepeated(finalMetaResult); } this.finalMetaResult = finalResultsList; setEndTimeOfSearch(new Date()); String message = "Indri retrieved " + this.finalMetaResult.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); this.retrievedDocsCount += finalResultsList.size(); return finalResultsList; } /** * {@inheritDoc} */ public long makeIndex() throws IndexException { this.collectionPath = new File(this.defaultCollectionPath); this.indexPath = new File(this.defaultIndexPath); long indexedFiles = 0; if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) { String message = collectionPath + "does not exist or is empty"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } else if (this.indexPath != null) { indexedFiles = this.build(MAKE_INDEX); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public long makeIndex(File collectionPath) throws IndexException { this.indexPath = new File(this.defaultIndexPath); this.collectionPath = collectionPath; long indexedFiles = 0; if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) { String message = collectionPath + "does not exist or is empty"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } else if (this.indexPath != null) { indexedFiles = this.build(MAKE_INDEX); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public long makeIndex(List<File> collectionPath) throws IndexException { this.indexPath = new File(this.defaultIndexPath); long indexedFiles = 0; if (collectionPath.isEmpty()) { this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files"); throw new IndexException("The collection does not have files"); } else if (this.indexPath != null) { indexedFiles = this.build(collectionPath, MAKE_INDEX); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public long makeIndex(File collectionPath, File indexPath) throws IndexException { this.indexPath = indexPath; this.collectionPath = collectionPath; long indexedFiles = 0; if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) { String message = collectionPath + "does not exist or is empty"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } else if (indexPath != null) { indexedFiles = this.build(MAKE_INDEX); } else { String message = "indexPath is null"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public long makeIndex(List<File> collectionPath, File indexPath) throws IndexException { this.indexPath = indexPath; long indexedFiles = 0; if (collectionPath.isEmpty()) { this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files"); throw new IndexException("The collection does not have files"); } else if (this.indexPath != null) { indexedFiles = this.build(collectionPath, MAKE_INDEX); } else { String message = "indexPath is null"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public long updateIndex(File collectionPath) throws IndexException { this.indexPath = new File(this.defaultIndexPath); this.collectionPath = collectionPath; long indexedFiles = 0; if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) { String message = collectionPath + "does not exist or is empty"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } else if (this.indexPath != null) { indexedFiles = this.build(ADD_INDEX); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public long updateIndex(List<File> collectionPath) throws IndexException { this.indexPath = new File(this.defaultIndexPath); long indexedFiles = 0; if (collectionPath.isEmpty()) { this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files"); throw new IndexException("The collection does not have files"); } else if (this.indexPath != null) { indexedFiles = this.build(collectionPath, ADD_INDEX); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public long updateIndex(File collectionPath, File indexPath) throws IndexException { this.indexPath = indexPath; this.collectionPath = collectionPath; long indexedFiles = 0; if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) { String message = collectionPath + "does not exist or is empty"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } else if (indexPath != null) { indexedFiles = this.build(ADD_INDEX); } else { String message = "indexPath is null"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public long updateIndex(List<File> collectionPath, File indexPath) throws IndexException { this.indexPath = indexPath; long indexedFiles = 0; if (collectionPath.size() == 0) { this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files"); throw new IndexException("The collection does not have files"); } else if (this.indexPath != null) { indexedFiles = this.build(collectionPath, ADD_INDEX); } else { String message = "indexPath is null"; this.notifyTaskProgress(ERROR_MESSAGE, message); throw new IndexException(message); } this.indexedDocsCount += indexedFiles; return indexedFiles; } /** * {@inheritDoc} */ public boolean loadIndex() throws IndexException { /*revisa solo uno de los directorios*/ File defaultfileCI = new File(this.defaultIndexPath.concat("/caseinsensitive")); File defaultfile = new File(this.defaultIndexPath.concat("/casesensitive")); if (defaultfileCI.exists() && defaultfile.exists()) { OutputMonitor.printLine("Loading Indri... "); try { //verify that an index exists //verify that an index exists File manifest = new File(defaultfileCI.getPath(), "manifest"); File manifest2 = new File(defaultfile.getPath(), "manifest"); if (manifest.exists() && manifest2.exists()) { // open the index this.theIndex = lemurproject.lemur.IndexManager.openIndex(defaultfileCI.getPath()); // get the count of documents int numDocuments = this.theIndex.docCount(); // get the average document length (in words) float avgDocLength = this.theIndex.docLengthAvg(); // get the count of total terms int totalTermCount = this.theIndex.termCount(); // get the count of _unique_ terms int uniqueTermCount = this.theIndex.termCountUnique(); // print out our statistics this.notifyTaskProgress(INFORMATION_MESSAGE, "Loading Indri..."); this.notifyTaskProgress(INFORMATION_MESSAGE, "# documents: " + numDocuments + "\n"); this.notifyTaskProgress(INFORMATION_MESSAGE, "Avg. Document Length: " + avgDocLength + "\n"); this.notifyTaskProgress(INFORMATION_MESSAGE, "# terms: " + totalTermCount + "\n"); this.notifyTaskProgress(INFORMATION_MESSAGE, "# unique terms: " + uniqueTermCount + "\n"); this.notifyLoadedDocument(numDocuments); OutputMonitor.printLine("# documents: " + numDocuments); //set path for search this.indexPath = defaultfile; this.indexPathCI = defaultfileCI; return true; } else { throw new IndexException("Not found index in this directory: " + this.defaultIndexPath); } } catch (Exception e) { throw new IndexException(e.getMessage()); } } return false; } /** * {@inheritDoc} */ public boolean loadIndex(File indexPath) throws IndexException { File newfileCI = new File(indexPath.getPath().concat("/casesensitive")); File newfile = new File(indexPath.getPath().concat("/caseinsensitive")); if (newfileCI.exists() && newfile.exists()) { OutputMonitor.printLine("Loading Indri... "); try { //verify that an index exists File manifest = new File(newfileCI.getPath(), "manifest"); File manifest2 = new File(newfile.getPath(), "manifest"); if (manifest.exists() && manifest2.exists()) { // open the index this.theIndex = lemurproject.lemur.IndexManager.openIndex(newfileCI.getPath()); // get the count of documents int numDocuments = this.theIndex.docCount(); // get the average document length (in words) float avgDocLength = this.theIndex.docLengthAvg(); // get the count of total terms int totalTermCount = this.theIndex.termCount(); // get the count of _unique_ terms int uniqueTermCount = this.theIndex.termCountUnique(); // print out our statistics this.notifyTaskProgress(INFORMATION_MESSAGE, "Loading Indri..."); this.notifyTaskProgress(INFORMATION_MESSAGE, "# documents: " + numDocuments + "\n"); this.notifyTaskProgress(INFORMATION_MESSAGE, "Avg. Document Length: " + avgDocLength + "\n"); this.notifyTaskProgress(INFORMATION_MESSAGE, "# terms: " + totalTermCount + "\n"); this.notifyTaskProgress(INFORMATION_MESSAGE, "# unique terms: " + uniqueTermCount + "\n"); this.notifyLoadedDocument(numDocuments); //set path for search this.indexPath = newfile; this.indexPathCI = newfileCI; OutputMonitor.printLine("# documents: " + numDocuments); return true; } else { throw new IndexException("Not found index in this directory: " + newfileCI.getPath()); } } catch (Exception e) { throw new IndexException(e.getMessage()); } } return false; } /** * Método para construir el índice con la colección por defecto * * @param operación a realizar: MAKE o ADD */ private long build(int operation) throws IndexException { long indexedFiles = 0; setStartTimeOfIndexation(new Date()); String t1 = "Indexing to directory '" + this.indexPath + "'..." + "\n"; this.notifyTaskProgress(INFORMATION_MESSAGE, t1); if (safeToBuildIndex(this.indexPath, operation)) { indexedFiles = indexDocs(this.collectionPath, operation); setEndTimeOfIndexation(new Date()); String t3 = getIndexationTime() + " total milliseconds" + "\n"; this.notifyTaskProgress(INFORMATION_MESSAGE, t3); } else { this.notifyTaskProgress(ERROR_MESSAGE, "Unable to build the index"); throw new IndexException("Unable to build the index"); } return indexedFiles; } /** * Método para construir el índice a partir de una colección de files * * @param operation ----- operación a realizar: MAKE o ADD * @param collectionPath ----- lista de ficheros que representan la colección */ private long build(List<File> collectionPath, int operation) throws IndexException { long indexedFiles = 0; String m1 = "Indexing to directory '" + this.indexPath + "'..." + "\n"; this.notifyTaskProgress(INFORMATION_MESSAGE, m1); if (safeToBuildIndex(this.indexPath, operation)) { //inicia la indexacion setStartTimeOfIndexation(new Date()); indexedFiles = indexDocs(collectionPath, operation); this.notifyTaskProgress(INFORMATION_MESSAGE, "Finalizó la indexación"); setEndTimeOfIndexation(new Date()); String m2 = getIndexationTime() + " total milliseconds" + "\n"; this.notifyTaskProgress(INFORMATION_MESSAGE, m2); } else { this.notifyTaskProgress(ERROR_MESSAGE, "Unable to build the index"); throw new IndexException("Unable to build the index"); } return indexedFiles; } /** * Indexa una colección dado una lista de files */ private int indexDocs(List<File> data, int operation) throws IndexException { int totalDocumentsIndexed = 0; this.envCS = new IndexEnvironment(); this.envCI = new IndexEnvironment(); this.status = new UIIndexStatus(); this.spec = null; try { // memory this.envCS.setMemory(encodeMem()); this.envCI.setMemory(encodeMem()); //case sensitive index this.envCS.setNormalization(false); this.envCI.setNormalization(true); //stopwords this.envCS.setStopwords(ENGLISH_STOP_WORDS); this.envCI.setStopwords(ENGLISH_STOP_WORDS); //get all the filespaths String[] datafiles = dataFilesList(data, new ArrayList<String>()); // create a new empty index (after parameters have been set). if (this.appendIndex) { this.envCS.open(this.idxCS.getPath(), this.status); this.envCI.open(this.idxCI.getPath(), this.status); } else { this.envCS.create(this.idxCS.getPath(), this.status); this.envCI.create(this.idxCI.getPath(), this.status); } // do the building String fname, fileClass; for (int i = 0; i < datafiles.length; i++) { fname = datafiles[i]; fileClass = getFileExtension(fname); if (fileClass.equalsIgnoreCase("java") || fileClass.equalsIgnoreCase("pdf") || fileClass.equalsIgnoreCase("txt")) { if (fileClass.equalsIgnoreCase("txt") || fileClass.equalsIgnoreCase("pdf")) { this.spec = this.envCS.getFileClassSpec(fileClass); this.envCS.addFileClass(this.spec); this.envCS.addFile(fname, fileClass); ///////////////////// this.spec = this.envCI.getFileClassSpec(fileClass); this.envCI.addFileClass(this.spec); this.envCI.addFile(fname, fileClass); //this.notifyIndexedDocument(); } else if (fileClass.equalsIgnoreCase("java")) { this.spec = this.envCS.getFileClassSpec("txt"); this.envCS.addFileClass(this.spec); this.envCS.addFile(fname, "txt"); ////////////////// this.spec = this.envCI.getFileClassSpec("txt"); this.envCI.addFileClass(this.spec); this.envCI.addFile(fname, "txt"); // this.notifyIndexedDocument(); } if (operation == ADD_INDEX) { this.notifyAddedDocument(); } else if (operation == MAKE_INDEX) { this.notifyIndexedDocument(); } ///stadistics totalDocumentsIndexed = this.envCS.documentsIndexed(); this.indexedDocsCount = totalDocumentsIndexed; } else { String message = "There are files in the collection that are not: .java, pdf o txt documents" + "\n" + "so, they could not be indexed."; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); } } this.envCS.close(); this.envCI.close(); } catch (Exception e) { // a lemur exception was tossed this.notifyTaskProgress(ERROR_MESSAGE, this.idxCS.getPath() + "\n" + e + "\n"); this.notifyTaskProgress(ERROR_MESSAGE, this.idxCI.getPath() + "\n" + e + "\n"); e.printStackTrace(); } this.notifyTaskProgress(INFORMATION_MESSAGE, "Finished building " + this.idxCS.getPath() + "\n"); this.notifyTaskProgress(totalDocumentsIndexed, "Total documents indexed: " + totalDocumentsIndexed + "\n\n"); return totalDocumentsIndexed; } /** * Indexa una colección dado un file */ private int indexDocs(File data, int operation) throws IndexException { int totalDocumentsIndexed = 0; this.envCS = new IndexEnvironment(); this.envCI = new IndexEnvironment(); this.status = new UIIndexStatus(); this.spec = null; try { // memory this.envCS.setMemory(encodeMem()); this.envCI.setMemory(encodeMem()); //indice case sensitive this.envCS.setNormalization(false); this.envCI.setNormalization(true); //stopwords this.envCS.setStopwords(ENGLISH_STOP_WORDS); this.envCI.setStopwords(ENGLISH_STOP_WORDS); //get all the filespaths String[] datafiles = dataFilesList(data, new ArrayList<String>()); // create a new empty index (after parameters have been set). if (this.appendIndex) { this.envCS.open(this.idxCS.getPath(), this.status); this.envCI.open(this.idxCI.getPath(), this.status); } else { this.envCS.create(this.idxCS.getPath(), this.status); this.envCI.create(this.idxCI.getPath(), this.status); } // do the building String fname, fileClass; for (int i = 0; i < datafiles.length; i++) { fname = datafiles[i]; fileClass = getFileExtension(fname); if (fileClass.equalsIgnoreCase("java") || fileClass.equalsIgnoreCase("pdf") || fileClass.equalsIgnoreCase("txt")) { if (fileClass.equalsIgnoreCase("txt") || fileClass.equalsIgnoreCase("pdf")) { this.spec = this.envCS.getFileClassSpec(fileClass); this.envCS.addFileClass(this.spec); this.envCS.addFile(fname, fileClass); ///////////////////// this.spec = this.envCI.getFileClassSpec(fileClass); this.envCI.addFileClass(this.spec); this.envCI.addFile(fname, fileClass); //this.notifyIndexedDocument(); } else if (fileClass.equalsIgnoreCase("java")) { this.spec = this.envCS.getFileClassSpec("txt"); this.envCS.addFileClass(this.spec); this.envCS.addFile(fname, "txt"); ////////////////// this.spec = this.envCI.getFileClassSpec("txt"); this.envCI.addFileClass(this.spec); this.envCI.addFile(fname, "txt"); //this.notifyIndexedDocument(); } if (operation == ADD_INDEX) { this.notifyAddedDocument(); } else if (operation == MAKE_INDEX) { this.notifyIndexedDocument(); } ///stadistics totalDocumentsIndexed = this.envCS.documentsIndexed(); this.indexedDocsCount = totalDocumentsIndexed; } else { String message = "There are files in the collection that are not: .java, pdf o txt documents" + "\n" + "so, they could not be indexed."; OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE); this.notifyTaskProgress(INFORMATION_MESSAGE, message); } } this.envCS.close(); this.envCI.close(); } catch (Exception e) { // a lemur exception was tossed this.notifyTaskProgress(ERROR_MESSAGE, this.idxCS.getPath() + "\n" + e + "\n"); this.notifyTaskProgress(ERROR_MESSAGE, this.idxCI.getPath() + "\n" + e + "\n"); e.printStackTrace(); } this.notifyTaskProgress(INFORMATION_MESSAGE, "Finished building " + this.idxCS.getPath() + "\n"); this.notifyTaskProgress(totalDocumentsIndexed, "Total documents indexed: " + totalDocumentsIndexed + "\n\n"); return totalDocumentsIndexed; } /** * codifica la memoria a utilizar */ private long encodeMem() { String s = "512000000"; long localRetval = 0; try { localRetval = Long.parseLong(s); } catch (Exception e) { } return localRetval; } private String[] dataFilesList(File dirPath, List<String> list) { // do not try to index files that cannot be read if (dirPath.canRead()) { if (dirPath.isDirectory()) { File[] files = dirPath.listFiles(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { dataFilesList(files[i], list); } } } else { list.add(dirPath.getAbsolutePath()); } } this.retval = new String[0]; this.retval = list.toArray(this.retval); return this.retval; } private String[] dataFilesList(List<File> dirPath, List<String> list) { File file; for (int i = 0; i < dirPath.size(); i++) { file = dirPath.get(i); if (file.canRead()) { if (file.isDirectory()) { File[] files = file.listFiles(); if (files != null) { for (int j = 0; j < files.length; j++) { dataFilesList(files[j], list); } } } else { list.add(file.getAbsolutePath()); } } } this.retval = new String[0]; this.retval = list.toArray(this.retval); return this.retval; } /** * Guarda los resultados de búsqueda * * */ private ArrayList<DocumentMetaData> saveResults(QueryResults sd, String queryT, QueryEnvironment q) throws IOException { ArrayList<DocumentMetaData> arraylist = new ArrayList<DocumentMetaData>(); DocumentMetaData metaDoc = null; String pathdoc = null; String summary = null; this.queryResults = sd.results; try { this.theIndex = lemurproject.lemur.IndexManager.openIndex(this.indexPath.getPath()); } catch (Exception ex) { OutputMonitor.printStream("", ex); } for (int i = 0; i < this.queryResults.length; i++) { metaDoc = new DocumentMetaData(); try { this.queryResultObj = this.queryResults[i]; int iddoc = this.queryResultObj.docid; double score = this.queryResultObj.score; summary = this.queryResultObj.snippet; if (summary == null) { summary = " "; } pathdoc = this.theIndex.document(iddoc); File file = new File(pathdoc); metaDoc.setIndex(iddoc); metaDoc.setScore(score); metaDoc.setPath(pathdoc); metaDoc.setName(file.getName()); metaDoc.setSize(file.length()); metaDoc.setSynthesis(summary); metaDoc.setType(getFileExtension(pathdoc)); metaDoc.setSearcher(KeySearchable.INDRI_SEARCH_ENGINE); arraylist.add(metaDoc); } catch (Exception ex) { OutputMonitor.printStream("", ex); } } return arraylist; } /** * {@inheritDoc} * * */ public boolean safeToBuildIndex(File indexP, int operation) throws IndexException { this.appendIndex = false; String idxstring = indexP.getPath(); String message = null; boolean flag = true; if (indexP.exists()) { //verifica si ya esta creado el indice if (indexP.listFiles().length == 2) { this.idxCS = new File(idxstring.concat("/casesensitive")); this.idxCI = new File(idxstring.concat("/caseinsensitive")); } else { //create folders this.idxCS = new File(idxstring.concat("/casesensitive")); this.idxCI = new File(idxstring.concat("/caseinsensitive")); if (!this.idxCS.mkdirs() && !this.idxCI.mkdirs()) { //ensure that the index folder exists message = "ERROR: Could not create the index folders at: " + this.idxCS.getPath() + ".\n" + this.idxCI.getPath() + "Aborting indexing process."; this.notifyTaskProgress(ERROR_MESSAGE, message); flag = false; throw new IndexException(message); } } } else { //create folders this.idxCS = new File(idxstring.concat("/casesensitive")); this.idxCI = new File(idxstring.concat("/caseinsensitive")); if (!this.idxCS.mkdirs() && !this.idxCI.mkdirs()) { //ensure that the index folder exists message = "ERROR: Could not create the index folders at: " + this.idxCS.getPath() + ".\n" + this.idxCI.getPath() + "Aborting indexing process."; this.notifyTaskProgress(ERROR_MESSAGE, message); flag = false; throw new IndexException(message); } } /* if (!this.idxCS.exists() && !this.idxCI.exists()) { this.idxCS = new File(idxstring.concat("/casesensitive")); this.idxCI = new File(idxstring.concat("/caseinsensitive")); if (!this.idxCS.mkdirs() && !this.idxCI.mkdirs()) { //ensure that the index folder exists message = "ERROR: Could not create the index folders at: " + this.idxCS.getPath() + ".\n" + this.idxCI.getPath() + "Aborting indexing process."; this.notifyTaskProgress(Assignable.ERROR_MESSAGE, message); return false; } }*/ File manifest = new File(this.idxCS.getPath(), "manifest"); File manifest2 = new File(this.idxCI.getPath(), "manifest"); if (manifest.exists() && manifest2.exists()) { switch (operation) { case MAKE_INDEX: message = "Overwriting index " + idxstring + "\n"; this.notifyTaskProgress(INFORMATION_MESSAGE, message); deleteFiles(this.idxCS); deleteFiles(this.idxCI); flag = true; break; case ADD_INDEX: message = "Appending new files to index " + idxstring + "\n"; this.notifyTaskProgress(INFORMATION_MESSAGE, message); this.appendIndex = true; flag = true; break; default: message = "Not building index " + idxstring + "\n"; this.notifyTaskProgress(ERROR_MESSAGE, message); flag = false; throw new IndexException(message); } } else if (operation == ADD_INDEX) { message = "ERROR: No Indri index exist in this address" + idxstring; this.notifyTaskProgress(ERROR_MESSAGE, message); flag = false; throw new IndexException(message); } return flag; } /** * Verifica que en un path dado exista un índice indri * * @param path * @return * @throws IndexException */ public boolean checkIndexPath(String path) throws IndexException { if (!path.contains("casesensitive") && !path.contains("caseinsensitive")) { this.indexPath = new File(path.concat("/casesensitive")); this.indexPathCI = new File(path.concat("/caseinsensitive")); } else { this.indexPath = new File(path); this.indexPathCI = new File(path); } Index theNewIndex = null; try { if (this.indexPath.exists()) { theNewIndex = IndexManager.openIndex(this.indexPath.getPath()); } else { this.notifyTaskProgress(ERROR_MESSAGE, "Index does not exist"); throw new IndexException("Index does not exist"); } } catch (Exception ex) { OutputMonitor.printStream("", ex); } if (theNewIndex != null) { return true; } else { return false; } } class UIIndexStatus extends IndexStatus { public void status(int code, String documentFile, String error, int documentsIndexed, int documentsSeen) { if (code == action_code.FileOpen.swigValue()) { notifyTaskProgress(INFORMATION_MESSAGE, "Documents: " + documentsIndexed + "\n"); notifyTaskProgress(INFORMATION_MESSAGE, "Opened " + documentFile + "\n"); } else if (code == action_code.FileSkip.swigValue()) { notifyTaskProgress(INFORMATION_MESSAGE, "Skipped " + documentFile + "\n"); } else if (code == action_code.FileError.swigValue()) { notifyTaskProgress(INFORMATION_MESSAGE, "Error in " + documentFile + " : " + error + "\n"); } else if (code == action_code.DocumentCount.swigValue()) { if ((documentsIndexed % 500) == 0) { notifyTaskProgress(INFORMATION_MESSAGE, "Documents: " + documentsIndexed + "\n"); } } } } private static final String[] ENGLISH_STOP_WORDS = { "a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with" }; }