/*
* DrakkarKeel - An Enterprise Collaborative Search Platform
*
* The contents of this file are subject under the terms described in the
* DRAKKARKEEL_LICENSE file included in this distribution; you may not use this
* file except in compliance with the License.
*
* 2013-2014 DrakkarKeel Platform.
*/
package drakkar.mast.retrieval;
import drakkar.oar.DocumentMetaData;
import drakkar.oar.facade.event.FacadeDesktopListener;
import static drakkar.oar.util.KeyField.*;
import static drakkar.oar.util.KeyMessage.*;
import drakkar.oar.util.KeySearchable;
import drakkar.oar.util.OutputMonitor;
import drakkar.mast.IndexException;
import drakkar.mast.SearchException;
import drakkar.mast.retrieval.parser.JavaParser;
import drakkar.mast.retrieval.parser.PdfParser;
import com.sun.labs.minion.FieldInfo;
import com.sun.labs.minion.Passage;
import com.sun.labs.minion.PassageBuilder;
import com.sun.labs.minion.Result;
import com.sun.labs.minion.ResultSet;
import com.sun.labs.minion.SearchEngineException;
import com.sun.labs.minion.SearchEngineFactory;
import com.sun.labs.minion.SimpleIndexer;
import com.sun.labs.minion.TextHighlighter;
import com.sun.labs.minion.query.Element;
import com.sun.labs.minion.query.Or;
import com.sun.labs.minion.query.Term;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
/**
* Clase que implementa el motor de búsqueda Minion, versión 1.0
*
*
*/
public class MinionContext extends AdvEngineContext {
private com.sun.labs.minion.SearchEngine engine; //clase que controla el motor de minion
private ResultSet resultSet; //estructura donde se almacenan los resultados de búsqueda
private List<Element> collectionElements; //tiene los elementos de la consulta
private List<Result> allResults;
private PassageBuilder passageBuilder;
private TextHighlighter tHighlighter;
/**
*
*/
public MinionContext() {
defaultIndexPath = "./index/minion";
}
/**
*
* @param listener
*/
public MinionContext(FacadeDesktopListener listener) {
super(listener);
defaultIndexPath = "./index/minion";
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
this.finalMetaResult = new ArrayList<DocumentMetaData>();
this.resultSet = null;
String[] words = null;
this.collectionElements = null;
String[] codeAndBooks = new String[4];
codeAndBooks[0] = getDocumentField(FIELD_CODE_ALL_SOURCE);
codeAndBooks[1] = getDocumentField(FIELD_DOC_TEXT);
codeAndBooks[2] = getDocumentField(FIELD_NAME);
codeAndBooks[3] = getDocumentField(FIELD_DOC_BOOK);
try {
setStartTimeOfSearch(new Date());
if (isMinionIndex(this.indexPath)) {
this.engine = SearchEngineFactory.getSearchEngine(this.indexPath.getPath());
//para analizar termino a termino de la consulta
if (query.contains(" ")) {
words = query.split(" ");
} else {
words = new String[1];
words[0] = query;
}
if (caseSensitive) {
this.collectionElements = new ArrayList<Element>();
String termonly, fieldtoprocess;
Element elem;
for (int i = 0; i < words.length; i++) {
termonly = words[i];
for (int j = 0; j < codeAndBooks.length; j++) {
fieldtoprocess = codeAndBooks[j];
elem = new Term(termonly, EnumSet.of(Term.Modifier.CASE, Term.Modifier.WILDCARD));
elem.addField(fieldtoprocess);
this.collectionElements.add(elem);
}
}
this.resultSet = this.engine.search(new Or(this.collectionElements));
} else if (caseSensitive == false) {
this.collectionElements = new ArrayList<Element>();
String termonly, fieldToProcess;
Element elem;
for (int i = 0; i < words.length; i++) {
termonly = words[i];
for (int j = 0; j < codeAndBooks.length; j++) {
fieldToProcess = codeAndBooks[j];
elem = new Term(termonly, EnumSet.of(Term.Modifier.WILDCARD, Term.Modifier.STEM));
elem.addField(fieldToProcess);
this.collectionElements.add(elem);
}
}
this.resultSet = this.engine.search(new Or(this.collectionElements));
}
this.allResults = this.resultSet.getAllResults(true);
//guardar resultados
this.finalMetaResult = saveResults(this.allResults);
//eliminar repetidos
if (this.finalMetaResult.size() > 1) {
deleteRepeated(this.finalMetaResult);
}
this.engine.close();
finalResultsList = this.finalMetaResult;
setEndTimeOfSearch(new Date());
String message = "Minion retrieved " + this.finalMetaResult.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
} else {
OutputMonitor.printLine("Index path incorrect", OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect");
}
} catch (com.sun.labs.minion.SearchEngineException ex) {
throw new SearchException(ex.getMessage());
}
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*
*/
@Override
public ArrayList<DocumentMetaData> search(String query, String docType, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> tempList = new ArrayList<DocumentMetaData>();
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
setStartTimeOfSearch(new Date());
//busca en toda la colección de documentos
tempList = search(query, caseSensitive);
//filtra los resultados por tipo de documento
finalResultsList = this.filterMetaDocuments(docType, tempList);
this.finalMetaResult = finalResultsList;
setEndTimeOfSearch(new Date());
String message = "Minion retrieved " + finalResultsList.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'. for doctype " + docType;
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, int field, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
this.finalMetaResult = new ArrayList<DocumentMetaData>();
String[] words = null;
try {
setStartTimeOfSearch(new Date());
if (isMinionIndex(this.indexPath)) {
this.engine = SearchEngineFactory.getSearchEngine(this.indexPath.getPath());
if (query.contains(" ")) {
words = query.split(" ");
} else {
words = new String[1];
words[0] = query;
}
if (caseSensitive) {
this.collectionElements = new ArrayList<Element>();
String termonly;
Element elem;
for (int i = 0; i < words.length; i++) {
termonly = words[i];
elem = new Term(termonly, EnumSet.of(Term.Modifier.CASE, Term.Modifier.WILDCARD));
elem.addField(getDocumentField(field));
this.collectionElements.add(elem);
}
this.resultSet = this.engine.search(new Or(this.collectionElements));
} else if (caseSensitive == false) {
String termonly;
Element elem;
this.collectionElements = new ArrayList<Element>();
for (int i = 0; i < words.length; i++) {
termonly = words[i];
elem = new Term(termonly, EnumSet.of(Term.Modifier.WILDCARD, Term.Modifier.STEM));
elem.addField(getDocumentField(field));
this.collectionElements.add(elem);
}
this.resultSet = this.engine.search(new Or(this.collectionElements));
}
this.allResults = this.resultSet.getAllResults(true);
//guardar resultados
this.finalMetaResult = saveResults(this.allResults);
//eliminar repetidos
if (this.finalMetaResult.size() > 1) {
deleteRepeated(this.finalMetaResult);
}
finalResultsList = this.finalMetaResult;
setEndTimeOfSearch(new Date());
String message = "Minion retrieved " + this.resultSet.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'for" + field;
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
this.engine.close();
} else {
OutputMonitor.printLine("Index path incorrect", OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect");
}
} catch (com.sun.labs.minion.SearchEngineException ex) {
throw new SearchException(ex.getMessage());
}
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, String docType, int field, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> tempList = new ArrayList<DocumentMetaData>();
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
setStartTimeOfSearch(new Date());
tempList = search(query, field, caseSensitive);
finalResultsList = this.filterMetaDocuments(docType, tempList);
this.finalMetaResult = finalResultsList;
setEndTimeOfSearch(new Date());
String message = "Minion retrieved " + finalResultsList.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'for field and doctype.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, String docType, int[] fields, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> tempList = null, finalResult = new ArrayList<DocumentMetaData>();
String docSource;
for (int i = 0; i < this.documentalSource.size(); i++) {
docSource = this.documentalSource.get(i);
if (docSource.equalsIgnoreCase(docType)) {
if (fields != null && fields.length > 0) {
for (Integer field : fields) {
if (field != 0) {
tempList = search(query, docType, field, caseSensitive);
if (tempList != null) {
finalResult.addAll(tempList);
}
}
this.deleteRepeated(finalResult);
}
} else {
tempList = search(query, docType, caseSensitive);
this.retrievedDocsCount += tempList.size();
return tempList;
}
} else if (docType == null) {
tempList = search(query, caseSensitive);
this.retrievedDocsCount += tempList.size();
return tempList;
}
}
this.retrievedDocsCount += finalResult.size();
return finalResult;
}
@Override
public ArrayList<DocumentMetaData> search(String query, String[] docType, int[] fields, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> tempList = null;
ArrayList<DocumentMetaData> documents = new ArrayList<DocumentMetaData>();
String doc;
for (int i = 0; i < docType.length; i++) {
doc = docType[i];
if (doc.equals("documents")) {
tempList = search(query, doc, caseSensitive);
} else {
tempList = search(query, doc, fields, caseSensitive);
}
documents.addAll(tempList);
}
this.deleteRepeated(documents);
this.retrievedDocsCount += documents.size();
return documents;
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, int[] fields, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> tempList = new ArrayList<DocumentMetaData>();
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
setStartTimeOfSearch(new Date());
int fieldAnalize;
for (int i = 0; i < fields.length; i++) {
fieldAnalize = fields[i];
tempList = search(query, fieldAnalize, caseSensitive);
finalResultsList.addAll(tempList);
}
if (finalResultsList.size() > 1) {
deleteRepeated(finalMetaResult);
}
this.finalMetaResult = finalResultsList;
setEndTimeOfSearch(new Date());
String message = "Minion retrieved " + finalResultsList.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'for field and doctype.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, String[] docTypes, int field, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> tempList = new ArrayList<DocumentMetaData>();
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
setStartTimeOfSearch(new Date());
for (int i = 0; i < docTypes.length; i++) {
String doc = docTypes[i];
if (doc.equals("documents")) {
tempList = search(query, doc, caseSensitive);
} else {
tempList = search(query, doc, field, caseSensitive);
}
finalResultsList.addAll(tempList);
}
if (finalResultsList.size() > 1) {
deleteRepeated(finalMetaResult);
}
this.finalMetaResult = finalResultsList;
setEndTimeOfSearch(new Date());
String message = "Minion retrieved " + finalResultsList.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'for field and doctype.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
public ArrayList<DocumentMetaData> search(String query, String[] docTypes, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> tempList = new ArrayList<DocumentMetaData>();
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
setStartTimeOfSearch(new Date());
for (int i = 0; i < docTypes.length; i++) {
String doc = docTypes[i];
tempList = search(query, doc, caseSensitive);
finalResultsList.addAll(tempList);
}
if (finalResultsList.size() > 1) {
deleteRepeated(finalMetaResult);
}
this.finalMetaResult = finalResultsList;
setEndTimeOfSearch(new Date());
String message = "Minion retrieved " + finalResultsList.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'. for doctypes ";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex() throws IndexException {
this.indexPath = new File(this.defaultIndexPath);
this.collectionPath = new File(this.defaultCollectionPath);
long indexedFiles = 0;
if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
String message = collectionPath + "does not exist or is empty";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
} else if (this.indexPath != null) {
indexedFiles = this.build(MAKE_INDEX);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex(File collectionPath) throws IndexException {
this.indexPath = new File(this.defaultIndexPath);
this.collectionPath = collectionPath;
long indexedFiles = 0;
if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
String message = collectionPath + "does not exist or is empty";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
} else if (this.indexPath != null) {
indexedFiles = this.build(MAKE_INDEX);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex(List<File> collectionPath) throws IndexException {
this.indexPath = new File(this.defaultIndexPath);
long indexedFiles = 0;
if (collectionPath.isEmpty()) {
OutputMonitor.printLine("The collection does not have files", OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
throw new IndexException("The collection does not have files");
} else if (this.indexPath != null) {
indexedFiles = this.build(collectionPath, MAKE_INDEX);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex(File collectionPath, File indexPath) throws IndexException {
this.indexPath = indexPath;
this.collectionPath = collectionPath;
long indexedFiles = 0;
if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
String message = collectionPath + "does not exist or is empty";
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
} else if (indexPath != null) {
indexedFiles = this.build(MAKE_INDEX);
} else {
String message = "indexPath is null";
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex(List<File> collectionPath, File indexPath) throws IndexException {
this.indexPath = indexPath;
long indexedFiles = 0;
if (collectionPath.isEmpty()) {
OutputMonitor.printLine("The collection does not have files", OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
throw new IndexException("The collection does not have files");
} else if (this.indexPath != null) {
indexedFiles = this.build(collectionPath, MAKE_INDEX);
} else {
String message = "indexPath is null";
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
}
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long updateIndex(File collectionPath) throws IndexException {
this.indexPath = new File(this.defaultIndexPath);
this.collectionPath = collectionPath;
long indexedFiles = 0;
if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
String message = collectionPath + "does not exist or is empty";
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
} else if (this.indexPath != null) {
indexedFiles = this.build(ADD_INDEX);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long updateIndex(List<File> collectionPath) throws IndexException {
this.indexPath = new File(this.defaultIndexPath);
long indexedFiles = 0;
if (collectionPath.isEmpty()) {
this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
throw new IndexException("The collection does not have files");
} else if (this.indexPath != null) {
indexedFiles = this.build(collectionPath, ADD_INDEX);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long updateIndex(File collectionPath, File indexPath) throws IndexException {
this.indexPath = indexPath;
this.collectionPath = collectionPath;
long indexedFiles = 0;
if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
String message = collectionPath + "does not exist or is empty";
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
} else if (indexPath != null) {
indexedFiles = this.build(ADD_INDEX);
} else {
String message = "indexPath is null";
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long updateIndex(List<File> collectionPath, File indexPath) throws IndexException {
this.indexPath = indexPath;
long indexedFiles = 0;
if (collectionPath.isEmpty()) {
this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
throw new IndexException("The collection does not have files");
} else if (this.indexPath != null) {
indexedFiles = this.build(collectionPath, ADD_INDEX);
} else {
String message = "indexPath is null";
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public boolean loadIndex(File indexPath) throws IndexException {
if (!indexPath.isDirectory() || !indexPath.exists() || indexPath == null || isMinionIndex(indexPath) == false) {
throw new IndexException("Not found index in this path");
} else {
try {
OutputMonitor.printLine("Loading Minion... ");
this.engine = SearchEngineFactory.getSearchEngine(indexPath.getPath());
int cant = this.engine.getNDocs();
this.notifyTaskProgress(INFORMATION_MESSAGE, "Loading Minion...");
try {
Thread.sleep(2000);
} catch (InterruptedException ex) {
ex.printStackTrace();
}
this.notifyLoadedDocument(cant);
this.notifyTaskProgress(INFORMATION_MESSAGE, "Total of documents of the index: " + cant);
//set path for search
this.indexPath = indexPath;
OutputMonitor.printLine("Total of documents of the index: " + cant, OutputMonitor.INFORMATION_MESSAGE);
return true;
} catch (com.sun.labs.minion.SearchEngineException ex) {
this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
throw new IndexException(ex.getMessage());
}
}
}
/**
* {@inheritDoc}
*/
@Override
public boolean loadIndex() throws IndexException {
File defaultFile = new File(this.defaultIndexPath);
try {
if (!defaultFile.isDirectory() || !defaultFile.exists()) {
throw new IndexException("Not found index");
} else if (isMinionIndex(defaultFile)) {
this.engine = SearchEngineFactory.getSearchEngine(this.defaultIndexPath);
int cant = this.engine.getNDocs();
this.notifyTaskProgress(INFORMATION_MESSAGE, "Loading Minion...");
try {
Thread.sleep(2000);
} catch (InterruptedException ex) {
ex.printStackTrace();
}
this.notifyLoadedDocument(cant);
this.notifyTaskProgress(INFORMATION_MESSAGE, "Total of documents of the index: " + cant);
//set path for search
this.indexPath = defaultFile;
return true;
} else {
return false;
}
} catch (com.sun.labs.minion.SearchEngineException ex) {
this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
throw new IndexException(ex.getMessage());
}
}
/**
* {@inheritDoc}
* @throws IndexException
*/
@Override
public boolean safeToBuildIndex(File indexPath, int operation) throws IndexException {
File idx = indexPath;
String idxpath = idx.getPath();
boolean flag = true;
if (!idx.exists()) {
if (!idx.mkdirs()) {
String message = "ERROR: Could not create the index folders at: " + idx.getPath() + ".\n" + "Aborting indexing process.";
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
flag = false;
throw new IndexException(message);
}
}
if (idx.exists() && isMinionIndex(idx)) {
/*Proceso de indexación en Minion por default:
si el directorio ya tiene un indice dentro este lo añade,
si el doc que va a añadir tiene el mismo(key) que tiene uno previamente indexado
entonces lo que hace es reemplazarlo.*/
switch (operation) {
case MAKE_INDEX:
// sobreescribir
String message = "Overwriting index " + idxpath + "\n";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
deleteFiles(idx);
flag = true;
break;
case ADD_INDEX:
//añadir
message = "Appending new files to index " + idxpath + "\n";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
flag = true;
break;
default:
message = "Not building index " + idxpath + "\n";
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
flag = false;
throw new IndexException(message);
}
} else if (operation == ADD_INDEX) {
String message = "ERROR: No Minion index exist in this address" + idx;
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
flag = false;
throw new IndexException(message);
}
return flag;
}
/**
* {@inheritDoc}
*/
@Override
public String getDocumentField(int field) {
switch (field) {
case FIELD_FILEPATH:
return "filepath";
case FIELD_NAME:
return "name";
case FIELD_CODE_PACKAGE:
return "package";
case FIELD_CODE_CLASSES_NAMES:
return "classesnames";
case FIELD_CODE_METHODS_NAMES:
return "methodsnames";
case FIELD_CODE_ALL_COMMENTS:
return "allcomments";
case FIELD_CODE_ALL_SOURCE:
return "allsource";//todo el contenido del codigo
case FIELD_CODE_VARIABLES_NAMES:
return "classesvariables";
case FIELD_CODE_JAVADOCS:
return "javadocs";
case FIELD_DOC_TEXT:
return "content"; //todo el contenido del pdf
case FIELD_DOC_BOOK:
return "book";
default:
return null;
}
}
/**
* Método para construir el índice con la colección por defecto
*
* @param operation --tipo de operacion a realizar con el índice: MAKE o ADD
*/
private long build(int operation) throws IndexException {
long indexedFiles = 0;
setStartTimeOfIndexation(new Date());
String message = "Minion index will be created at [" + this.indexPath + "]";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
//inicia la indexacion
try {
if (safeToBuildIndex(this.indexPath, operation)) {
this.engine = SearchEngineFactory.getSearchEngine(this.indexPath.getPath()); //Gets the default configuration for an index in the given directory.
defineFields(this.engine);
SimpleIndexer si = this.engine.getSimpleIndexer();
indexedFiles = indexDocs(si, this.collectionPath, operation);
message = "Optimizing...";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
setEndTimeOfIndexation(new Date());
message = "Indexation Time " + this.getIndexationTime() + " milliseconds.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
si.finish();
this.engine.close();
}
} catch (Exception e) {
message = " caught a " + e.getClass() + "\n with message: " + e.getMessage() + ".";
OutputMonitor.printStream(message, e);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
}
return indexedFiles;
}
/**
* Método para construir el índice a partir de una colección dada
*
* @param collectionPath -- colección de files a indexar
* @param operation -- tipo de operacion a realizar con el índice: MAKE o ADD
*/
private long build(List<File> collectionPath, int operation) throws IndexException {
long indexedFiles = 0;
setStartTimeOfIndexation(new Date());
String message = "Minion index will be created at [" + this.indexPath + "]";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
//inicia la indexacion
try {
if (safeToBuildIndex(this.indexPath, operation)) {
//obtiene la configuración por defecto para un índice en el directorio dado
this.engine = SearchEngineFactory.getSearchEngine(this.indexPath.getPath());
defineFields(this.engine);
SimpleIndexer si = this.engine.getSimpleIndexer();
indexedFiles = indexDocs(si, collectionPath, operation);
message = "Optimizing...";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
setEndTimeOfIndexation(new Date());
message = "Indexation Time " + this.getIndexationTime() + " milliseconds.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
si.finish();
this.engine.close();
}
} catch (Exception e) {
message = " caught a " + e.getClass() + "\n with message: " + e.getMessage() + ".";
OutputMonitor.printStream(message, e);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
}
return indexedFiles;
}
/**
* Establecer el tipo de campo y su nombre que se guardará en el índice
* @param engine
* @throws com.sun.labs.minion.SearchEngineException
*/
private void defineFields(com.sun.labs.minion.SearchEngine engine) throws com.sun.labs.minion.SearchEngineException {
this.engine = engine;
EnumSet<FieldInfo.Attribute> ia = FieldInfo.getIndexedAttributes();
EnumSet<FieldInfo.Attribute> enums = ia.clone();
enums.add(FieldInfo.Attribute.CASE_SENSITIVE);
enums.add(FieldInfo.Attribute.SAVED);
enums.add(FieldInfo.Attribute.INDEXED);
enums.add(FieldInfo.Attribute.TOKENIZED);//da error
enums.add(FieldInfo.Attribute.TRIMMED);
//Para todos
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_FILEPATH), enums, FieldInfo.Type.STRING));
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_NAME), enums, FieldInfo.Type.STRING));
//Para código fuente
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_CODE_PACKAGE), enums, FieldInfo.Type.STRING));
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_CODE_CLASSES_NAMES), enums, FieldInfo.Type.STRING));
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_CODE_VARIABLES_NAMES), enums, FieldInfo.Type.STRING));
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_CODE_METHODS_NAMES), enums, FieldInfo.Type.STRING));
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_CODE_ALL_COMMENTS), enums, FieldInfo.Type.STRING));
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_CODE_ALL_SOURCE), enums, FieldInfo.Type.STRING));
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_CODE_JAVADOCS), enums, FieldInfo.Type.STRING));
//Para libros y otros docs
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_DOC_TEXT), enums, FieldInfo.Type.STRING));
this.engine.defineField(new FieldInfo(getDocumentField(FIELD_DOC_BOOK), enums, FieldInfo.Type.STRING));
}
/**
* Verifica si en el directorio hay un indice minion
* @param dir
* @return
*/
private boolean isMinionIndex(File dir) {
String[] content = dir.list();
String string;
for (int i = 0; i < content.length; i++) {
string = content[i];
if (string.equalsIgnoreCase("config.xml")) {
return true;
}
}
return false;
}
/**
* Indexa los documentos de diferentes ficheros
* @param sind
* @param fileDir
* @throws IOException
*/
private int indexDocs(SimpleIndexer simpleInd, List<File> fileDir, int operation) throws IndexException {
int docCount = 0;
File file;
String message;
for (int i = 0; i < fileDir.size(); i++) {
file = fileDir.get(i);
if (file.getName().endsWith(".java") || file.getName().endsWith(".pdf") || file.getName().endsWith(".txt")) {
{
indexFile(simpleInd, file, operation);
message = "Adding: " + file;
docCount++;
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
}
} else {
message = "There are files in the collection that are not: .java, pdf o txt documents" + "\n" + "so, they could not be indexed.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
}
}
return docCount;
}
/**
* Indexa los documentos que se encuentran en un fichero
* @param sind
* @param fileDir
* @throws IOException
*/
private int indexDocs(SimpleIndexer simpleInd, File fileDir, int operation) throws IndexException {
int docCount = 0;
if (fileDir.canRead()) {
if (fileDir.isDirectory()) {
String[] files = fileDir.list();
this.indexedDocsCount = files.length;
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(simpleInd, new File(fileDir, files[i]), operation);
}
}
} else if (fileDir.getName().endsWith(".java") || fileDir.getName().endsWith(".pdf") || fileDir.getName().endsWith(".txt")) {
{
indexFile(simpleInd, fileDir, operation);
String temp = "Adding: " + fileDir;
docCount++;
OutputMonitor.printLine(temp, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, temp);
}
} else {
String message = "There are files in the collection that are not: .java, pdf o txt documents" + "\n" + "so, they could not be indexed.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
}
}
return docCount;
}
/**
* Indexacion por campo de cada documento del repositorio
* @param simpleInd
* @param f
* @throws IOException
*/
private void indexFile(SimpleIndexer simpleInd, File f, int operation) throws IndexException {
if (f.isHidden() || !f.exists() || !f.canRead()) {
return;
}
DocumentMinion docm = null;
try {
if (f.getPath().endsWith(".pdf")) {
PdfParser pdfp = new PdfParser();
pdfp.divideTextforMinion(f, simpleInd);
} else if (f.getPath().endsWith(".java")) {
ArrayList<String> comment = new ArrayList<String>();
ArrayList<String> javadocs = new ArrayList<String>();
JavaParser jp = new JavaParser();
jp.AnalyzeDocument(f);
// Start a new document, using the path as a key
docm = new DocumentMinion(simpleInd, f.getPath());
docm.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
docm.addField(getDocumentField(FIELD_NAME), f.getName());
docm.addField(getDocumentField(FIELD_CODE_ALL_SOURCE), jp.getAllSource()); //en este campo se guarda todo el codigo del documento
if (jp.getClassPackage() != null) {
docm.addField(getDocumentField(FIELD_CODE_PACKAGE), jp.getClassPackage());
}
for (int i = 0; i < jp.getClassNumber(); i++) {
docm.addField(getDocumentField(FIELD_CODE_CLASSES_NAMES), jp.getClassesNames(i));
if (jp.getClassesComments(i) != null) {
comment.add(jp.getClassesComments(i));
}
if (jp.getClassesJDocs(i) != null) {
javadocs.add(jp.getClassesJDocs(i));
}
for (int l = 0; l < jp.getClassVariableNumber(i); l++) {
docm.addField(getDocumentField(FIELD_CODE_VARIABLES_NAMES), jp.getClassesVarName(i, l));
if (jp.getClassesCommentVariables(i, l) != null) {
comment.add(jp.getClassesCommentVariables(i, l));
}
if (jp.getVariablesJDocs(i, l) != null) {
javadocs.add(jp.getVariablesJDocs(i, l));
}
}
for (int j = 0; j < jp.getClassesMethods(i); j++) {
docm.addField(getDocumentField(FIELD_CODE_METHODS_NAMES), jp.getClassesMethodsName(i, j));
if (jp.getClassesMethodComment(i, j) != null) {
comment.add(jp.getClassesMethodComment(i, j));
}
if (jp.getClassesMethodJDocs(i, j) != null) {
javadocs.add(jp.getClassesMethodJDocs(i, j));
}
}
//unir los comentarios
docm.addField(getDocumentField(FIELD_CODE_ALL_COMMENTS), joinData(comment));
docm.addField(getDocumentField(FIELD_CODE_JAVADOCS), joinData(javadocs));
}
docm.closeDocument();
} else if (f.getPath().endsWith(".txt")) {
docm = new DocumentMinion(simpleInd, f.getPath());
docm.addField(getDocumentField(FIELD_FILEPATH), f.getCanonicalPath());
docm.addField(getDocumentField(FIELD_NAME), f.getName());
docm.addField(getDocumentField(FIELD_DOC_TEXT), readFile(f));
docm.closeDocument();
}
if (operation == ADD_INDEX) {
this.notifyAddedDocument();
} else if (operation == MAKE_INDEX) {
this.notifyIndexedDocument();
}
} catch (IOException ex) {
OutputMonitor.printStream("", ex);
this.notifyTaskProgress(ERROR_MESSAGE, ex.getMessage());
throw new IndexException(ex.getMessage());
}
}
/**
* lee el contenido de un file
* @param f
* @return
*/
private String readFile(File f) {
String result = " ";
char c;
FileInputStream in = null;
try {
in = new FileInputStream(f);
} catch (FileNotFoundException ex) {
OutputMonitor.printStream("", ex);
}
int buffer;
try {
while ((buffer = in.read()) != -1) {
c = (char) buffer;
result = result.concat(String.valueOf(c));
}
in.close();
} catch (IOException e) {
e.printStackTrace();
}
return result;
}
/**
* Une los comentarios de una clase de codigo fuente:
* comentarios de las variables, metodos, y de la clase
* y une los javadocs también
* @param aa
* @return
*/
private String joinData(ArrayList<String> aa) {
String result = " ";
if (aa.size() != 0) {
for (int i = 0; i < aa.size(); i++) {
if (aa.get(i) != null) {
result = result.concat(" " + aa.get(i));
}
}
} else {
result = " "; //empty
}
return result;
}
/**
* Guardar resultados de busqueda en forma de Metadocument
* @param sd
* @param queryT
* @return
*/
private ArrayList<DocumentMetaData> saveResults(List<Result> sd) {
ArrayList<DocumentMetaData> docsFound = new ArrayList<DocumentMetaData>();
Result result;
List fpath, namef;
String filePath, name, fileType, summary;
Float scor;
File fil;
long fsize;
for (int i = 0; i < sd.size(); i++) {
result = sd.get(i);
fpath = result.getField(getDocumentField(FIELD_FILEPATH));
filePath = (String) fpath.get(0); //hasta ahora solo se guarda un solo valor por campo
scor = result.getScore();
namef = result.getField(getDocumentField(FIELD_NAME));
name = (String) namef.get(0); //hasta ahora solo se guarda un solo valor por campo
fileType = getFileExtension(filePath);
fil = new File(filePath);
fsize = fil.length();
summary = getPassage2(result, fileType);
if (summary == null) {
summary = " ";
}
DocumentMetaData metaDoc = new DocumentMetaData();
metaDoc.setPath(filePath);
metaDoc.setName(name);
metaDoc.setScore(Double.valueOf(String.valueOf(scor)));
metaDoc.setSize(fsize);
metaDoc.setIndex(result.getKey().hashCode()); //numero que representa ese doc en los resultados de busqueda
metaDoc.setType(fileType);
metaDoc.setSynthesis(summary);
metaDoc.setSearcher(KeySearchable.MINION_SEARCH_ENGINE);
docsFound.add(metaDoc);
}
this.retrievedDocsCount = docsFound.size();
return docsFound;
}
/**
* Para la sumarización del documento
* @param r
* @param fileType
* @return
*/
private String getPassage(Result r, String fileType) {
String highlighted = " ";
String field = null;
if (fileType.equalsIgnoreCase("pdf")) {
field = getDocumentField(FIELD_DOC_BOOK);
} else if (fileType.equalsIgnoreCase("java")) {
field = getDocumentField(FIELD_CODE_ALL_SOURCE);
} else if (fileType.equalsIgnoreCase("txt")) {
field = getDocumentField(FIELD_DOC_TEXT);
}
this.passageBuilder = r.getPassageBuilder();
this.passageBuilder.addPassageField(field, Passage.Type.JOIN, -1, 256, true);
@SuppressWarnings("unchecked")
Map<String, Object> docMap = getDocumentMap(r.getKey());
//Gets the highlighted passages that were specified using addPassageField.
Map<String, List<Passage>> pmap = this.passageBuilder.getPassages(docMap, -1, -1, false);
if (pmap.get(field) != null) {
if (!pmap.get(field).isEmpty()) {
Passage sp = pmap.get(field).get(0); //get the passage of that field to highlight
if (sp != null) {
// this.simpleHighlighter = new SimpleHighlighter("<font color=\"#00ff00\">","</font>", "<b>", "</b>");
tHighlighter = new TextHighlighter();
highlighted = sp.highlight(tHighlighter, false);
}
}
}
return highlighted;
}
private String getPassage2(Result r, String fileType) {
String field = null;
String summary = null;
if (fileType.equalsIgnoreCase("pdf")) {
field = getDocumentField(FIELD_DOC_BOOK);
} else if (fileType.equalsIgnoreCase("java")) {
field = getDocumentField(FIELD_CODE_ALL_SOURCE);
} else if (fileType.equalsIgnoreCase("txt")) {
field = getDocumentField(FIELD_DOC_TEXT);
}
if (field != null) {
String single = (String) r.getSingleFieldValue(field);
if (single.length() > 200) {
summary = single.substring(0, 200);
} else {
summary = single.substring(0);
}
}
return summary;
}
/**
* Dado un key de un doc obtiene la relacion <field,value> para todos
* los campos de este doc
* @param keyDoc
* @return
*/
private Map getDocumentMap(String keyDoc) {
LinkedHashMap<String, String> list2 = new LinkedHashMap<String, String>();
String keyField, value;
List valueField;
if (this.engine != null) {
for (Iterator<Entry<String, List>> list = this.engine.getDocument(keyDoc).getSavedFields(); list.hasNext();) {
Entry<String, List> entry = list.next();
keyField = entry.getKey();
valueField = entry.getValue();
for (int i = 0; i < valueField.size(); i++) {
value = (String) valueField.get(i);
list2.put(keyField, value);
}
}
}
// this.engine.getSimpleIndexer().endDocument();
return list2;
}
}