/*
* DrakkarKeel - An Enterprise Collaborative Search Platform
*
* The contents of this file are subject under the terms described in the
* DRAKKARKEEL_LICENSE file included in this distribution; you may not use this
* file except in compliance with the License.
*
* 2013-2014 DrakkarKeel Platform.
*/
package drakkar.mast.retrieval;
import drakkar.oar.DocumentMetaData;
import drakkar.oar.facade.event.FacadeDesktopListener;
import static drakkar.oar.util.KeyMessage.*;
import drakkar.oar.util.KeySearchable;
import drakkar.oar.util.OutputMonitor;
import drakkar.mast.IndexException;
import drakkar.mast.SearchException;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import uk.ac.gla.terrier.indexing.BasicIndexer;
import uk.ac.gla.terrier.indexing.BasicSinglePassIndexer;
import uk.ac.gla.terrier.indexing.BlockIndexer;
import uk.ac.gla.terrier.indexing.BlockSinglePassIndexer;
import uk.ac.gla.terrier.indexing.Collection;
import uk.ac.gla.terrier.indexing.Indexer;
import uk.ac.gla.terrier.indexing.SimpleFileCollection;
import uk.ac.gla.terrier.matching.ResultSet;
import uk.ac.gla.terrier.querying.Manager;
import uk.ac.gla.terrier.querying.SearchRequest;
import uk.ac.gla.terrier.querying.parser.Query;
import uk.ac.gla.terrier.querying.parser.QueryParser;
import uk.ac.gla.terrier.structures.Index;
import uk.ac.gla.terrier.utility.ApplicationSetup;
import uk.ac.gla.terrier.utility.Files;
/**
* Clase que instancia el motor de búsqueda Terrier versión 2.1
*
*
*/
public final class TerrierContext extends EngineContextAdapter {
public Indexer indexer;
public Manager queryingManager;
public String mModel = ApplicationSetup.getProperty("desktop.matching", "Matching");
public String wModel = ApplicationSetup.getProperty("desktop.model", "PL2");
public Index diskIndex;
public SimpleFileCollection sfc; //to build the index
public Query queryTerrier;
public SearchRequest searchRequest;
public ResultSet result;
private boolean indexing;
String message;
/**
* default constructor
*/
public TerrierContext() {
this.defaultIndexPath = "./index/terrier/";
setIndexing(false);
}
/**
* constructor
*
* @param listener --oyente de los procesos de este motor
*/
public TerrierContext(FacadeDesktopListener listener) {
super(listener);
this.defaultIndexPath = "./index/terrier/";
setIndexing(false);
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, boolean caseSensitive) throws SearchException {
setStartTimeOfSearch(new Date());
boolean flag = false;
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
ArrayList<DocumentMetaData> docindexed = new ArrayList<DocumentMetaData>();
ApplicationSetup.TERRIER_INDEX_PATH = this.indexPath.getAbsolutePath();
//Verificar el directorio donde se encuentra el indice
if (this.indexPath == null || this.indexPath.listFiles().length == 0) {
this.notifyTaskProgress(ERROR_MESSAGE, "Index path incorrect");
} else {
flag = true;
}
if (flag) {
try {
this.queryTerrier = null;
try {
this.queryTerrier = QueryParser.parseQuery(query);
} catch (Exception e) {
//remove everything except character and spaces, and retry
// this.queryTerrier = QueryParser.parseQuery(query.replaceAll("[^a-zA-Z0-9 ]", ""));
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new SearchException(e.getMessage());
}
if (setManager()) {
this.searchRequest = this.queryingManager.newSearchRequest();
this.searchRequest.setQuery(this.queryTerrier);
this.searchRequest.addMatchingModel(this.mModel, this.wModel);
this.searchRequest.setControl("c", "1.0d"); //nombre y valor del controlador
this.queryingManager.runPreProcessing(this.searchRequest);
this.queryingManager.runMatching(this.searchRequest);
this.queryingManager.runPostProcessing(this.searchRequest);
this.queryingManager.runPostFilters(this.searchRequest);
this.result = this.searchRequest.getResultSet();
docindexed = saveResults(this.result);
//eliminar repetidos
deleteRepeated(docindexed);
finalResultsList = docindexed;
this.finalMetaResult = docindexed;
setEndTimeOfSearch(new Date());
message = "Terrier retrieved " + this.result.getResultSize() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
} else {
this.notifyTaskProgress(ERROR_MESSAGE, "The manager wasn't charged");
}
} catch (Exception e) {
message = "An exception when running the query: '" + query + "', Error: " + e.getMessage();
OutputMonitor.printLine(message, OutputMonitor.ERROR_MESSAGE);
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new SearchException(message);
}
}
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, String docType, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
setStartTimeOfSearch(new Date());
ArrayList<DocumentMetaData> searchResults = search(query, caseSensitive);
finalResultsList = this.filterMetaDocuments(docType, searchResults);
this.finalMetaResult = finalResultsList;
setEndTimeOfSearch(new Date());
message = "Terrier retrieved " + finalResultsList.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "'for " + docType;
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
@Override
public ArrayList<DocumentMetaData> search(String query, String[] docTypes, boolean caseSensitive) throws SearchException {
ArrayList<DocumentMetaData> finalResultsList = new ArrayList<DocumentMetaData>();
ArrayList<DocumentMetaData> tempList = new ArrayList<DocumentMetaData>();
setStartTimeOfSearch(new Date());
for (int i = 0; i < docTypes.length; i++) {
String doc = docTypes[i];
tempList = search(query, doc, caseSensitive);
finalResultsList.addAll(tempList);
}
if (finalResultsList.size() > 1) {
deleteRepeated(finalMetaResult);
}
this.finalMetaResult = finalResultsList;
setEndTimeOfSearch(new Date());
message = "Terrier retrieved " + this.finalMetaResult.size() + " document(s) (in " + getSearchTime() + " milliseconds) that matched query '" + query + "for doctypes";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
this.retrievedDocsCount += finalResultsList.size();
return finalResultsList;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex() throws IndexException {
this.indexPath = new File(this.defaultIndexPath);
this.collectionPath = new File(this.defaultCollectionPath);
long indexedFiles = 0;
if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
message = collectionPath + "does not exist or is empty";
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
} else if (this.indexPath != null) {
ApplicationSetup.TERRIER_INDEX_PATH = this.indexPath.getAbsolutePath();
indexedFiles = this.build();
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex(File collectionPath) throws IndexException {
this.indexPath = new File(this.defaultIndexPath);
this.collectionPath = collectionPath;
long indexedFiles = 0;
if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
message = collectionPath + "does not exist or is empty";
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
} else if (this.indexPath != null) {
ApplicationSetup.TERRIER_INDEX_PATH = this.indexPath.getAbsolutePath();
indexedFiles = this.build();
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex(List<File> collectionPath) throws IndexException {
this.indexPath = new File(this.defaultIndexPath);
long indexedFiles = 0;
if (collectionPath.isEmpty()) {
this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
throw new IndexException("The collection does not have files");
} else if (this.indexPath != null) {
ApplicationSetup.TERRIER_INDEX_PATH = this.indexPath.getAbsolutePath();
indexedFiles = this.build(collectionPath);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex(File collectionPath, File indexPath) throws IndexException {
long indexedFiles = 0;
this.collectionPath = collectionPath;
this.indexPath = indexPath;
if (!this.collectionPath.exists() || this.collectionPath.listFiles().length == 0) {
message = collectionPath + "does not exist or is empty";
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
} else if (indexPath != null) {
ApplicationSetup.TERRIER_INDEX_PATH = this.indexPath.getAbsolutePath();
indexedFiles = this.build();
} else {
message = "indexPath is null";
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public long makeIndex(List<File> collectionPath, File indexPath) throws IndexException {
long indexedFiles = 0;
this.indexPath = indexPath;
if (collectionPath.isEmpty()) {
this.notifyTaskProgress(ERROR_MESSAGE, "The collection does not have files");
throw new IndexException("The collection does not have files");
} else if (this.indexPath != null) {
ApplicationSetup.TERRIER_INDEX_PATH = this.indexPath.getAbsolutePath();
indexedFiles = this.build(collectionPath);
} else {
message = "indexPath is null";
this.notifyTaskProgress(ERROR_MESSAGE, message);
throw new IndexException(message);
}
this.indexedDocsCount += indexedFiles;
return indexedFiles;
}
/**
* {@inheritDoc}
*/
@Override
public boolean loadIndex() throws IndexException {
File defaultFile = new File(this.defaultIndexPath);
ApplicationSetup.TERRIER_INDEX_PATH = defaultFile.getAbsolutePath();
if (defaultFile.exists() && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, "data")) {
OutputMonitor.printLine("Loading Terrier... ");
if (this.diskIndex == null) {
this.diskIndex = Index.createIndex();
}
File[] list = defaultFile.listFiles();
if (this.diskIndex == null || list.length == 0) {
if (this.diskIndex != null) {
this.diskIndex.close();
}
this.diskIndex = null;
return false;
} else if (this.diskIndex != null && list.length != 0) {
this.notifyTaskProgress(INFORMATION_MESSAGE, "Loading Terrier...");
this.notifyTaskProgress(INFORMATION_MESSAGE, "Number of Documents: " + this.diskIndex.getCollectionStatistics().getNumberOfDocuments());
this.notifyTaskProgress(INFORMATION_MESSAGE, "Number of Tokens: " + this.diskIndex.getCollectionStatistics().getNumberOfTokens());
this.notifyTaskProgress(INFORMATION_MESSAGE, "Number of Unique Terms: " + this.diskIndex.getCollectionStatistics().getNumberOfUniqueTerms());
this.notifyTaskProgress(INFORMATION_MESSAGE, "Number of Pointers: " + this.diskIndex.getCollectionStatistics().getNumberOfPointers());
if (isIndexing()) {
this.notifyIndexedDocumentTerrier(this.diskIndex.getCollectionStatistics().getNumberOfDocuments());
} else {
this.notifyLoadedDocument(this.diskIndex.getCollectionStatistics().getNumberOfDocuments());
message = "Number of docs loaded: " + this.diskIndex.getCollectionStatistics().getNumberOfDocuments();
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
//set path for search
this.indexPath = new File(ApplicationSetup.TERRIER_INDEX_PATH);
}
}
} else {
this.notifyTaskProgress(ERROR_MESSAGE, "The index has not been created");
throw new IndexException("The index has not been created");
}
return true;
}
/**
* {@inheritDoc}
*/
@Override
public boolean loadIndex(File indexPath) throws IndexException {
boolean flag = false;
// verificar que sea un indice terrier
if (Index.existsIndex(indexPath.getPath(), "data")) {
OutputMonitor.printLine("Loading Terrier... ");
ApplicationSetup.TERRIER_INDEX_PATH = indexPath.getPath();
if (this.diskIndex == null) {
this.diskIndex = Index.createIndex();
}
File[] list = indexPath.listFiles();
if (this.diskIndex != null && list.length != 0) {
this.notifyTaskProgress(INFORMATION_MESSAGE, "Loading Terrier...");
this.notifyTaskProgress(INFORMATION_MESSAGE, "Number of Documents: " + this.diskIndex.getCollectionStatistics().getNumberOfDocuments() + "\n");
this.notifyTaskProgress(INFORMATION_MESSAGE, "Number of Tokens: " + this.diskIndex.getCollectionStatistics().getNumberOfTokens() + "\n");
this.notifyTaskProgress(INFORMATION_MESSAGE, "Number of Unique Terms: " + this.diskIndex.getCollectionStatistics().getNumberOfUniqueTerms() + "\n");
this.notifyTaskProgress(INFORMATION_MESSAGE, "Number of Pointers: " + this.diskIndex.getCollectionStatistics().getNumberOfPointers() + "\n");
if (isIndexing()) {
this.notifyIndexedDocumentTerrier(this.diskIndex.getCollectionStatistics().getNumberOfDocuments());
} else if (isIndexing() == false) {
this.notifyLoadedDocument(this.diskIndex.getCollectionStatistics().getNumberOfDocuments());
message = "Number of docs loaded: " + this.diskIndex.getCollectionStatistics().getNumberOfDocuments();
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
//set path for search
this.indexPath = new File(ApplicationSetup.TERRIER_INDEX_PATH);
}
flag = true;
} else {
this.notifyTaskProgress(ERROR_MESSAGE, "no se puede cargar el índice");
}
} else {
throw new IndexException("No existe un índice en este directorio: " + indexPath.getPath());
}
return flag;
}
/**
* {@inheritDoc}
*
*/
public boolean safeToBuildIndex(File idx) throws IndexException {
boolean flag = true;
if (!idx.exists()) {
if (!idx.mkdirs()) {
//ensure that the index folder exists
String msg = "ERROR: Could not create the index folders at: " + idx.getPath() + ".\n" + "Aborting indexing process.";
this.notifyTaskProgress(ERROR_MESSAGE, msg);
flag = false;
throw new IndexException(msg);
}
} else if (idx.exists() && idx.listFiles().length != 0) {
message = "Overwriting index " + idx + "\n";
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
deleteFiles(idx);
flag = true;
}
return flag;
}
/**
* Método para construir el índice con la colección por defecto
*
*/
private long build() throws IndexException {
long indexedFiles = 0;
setStartTimeOfIndexation(new Date());
message = "Indexing directory '" + this.collectionPath + "'...";
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
try {
this.diskIndex = Index.createIndex();
//deleting existing files
if (this.diskIndex != null) {
this.diskIndex.close();
this.diskIndex = null;
}
if (safeToBuildIndex(this.indexPath)) {
//determinar indexer
final boolean useSinglePass = Boolean.parseBoolean(ApplicationSetup.getProperty("desktop.indexing.singlepass", "false"));
this.indexer = ApplicationSetup.BLOCK_INDEXING
? useSinglePass
? new BlockSinglePassIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)
: new BlockIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)
: useSinglePass
? new BasicSinglePassIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)
: new BasicIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX);
//get all the filespaths to index
List<String> foldersList = dataFilesList(this.collectionPath, new ArrayList<String>());
List<String> newList = verifyDocumentType(foldersList);
if (!newList.isEmpty()) {
this.indexedDocsCount = foldersList.size();
this.sfc = new SimpleFileCollection(foldersList, true);
this.indexer.index(new Collection[]{this.sfc}); //crea el indice
System.gc();
//abrir la lista de files de SimpleFileCollection
List<String> fileList = this.sfc.getFileList();
savePathsList(new File(ApplicationSetup.makeAbsolute(ApplicationSetup.getProperty("desktop.directories.filelist", "data.filelist"),
ApplicationSetup.TERRIER_INDEX_PATH)), fileList);
//verificar que se creo el indice
setIndexing(true);
if (loadIndex()) {
message = "El proceso de indexación ha terminado correctamente.\n"
+ "--------- Indexed files ---------";
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
indexedFiles = fileList.size();
String file;
for (int i = 0; i < indexedFiles; i++) {
file = fileList.get(i);
this.notifyTaskProgress(INFORMATION_MESSAGE, " - " + file);
}
} else {
this.notifyTaskProgress(ERROR_MESSAGE, "Problems when charging the index");
throw new IndexException("Problems when charging the index");
}
setIndexing(false);
setEndTimeOfIndexation(new Date());
String time = "IndexationTime " + getIndexationTime() + " milliseconds";
this.notifyTaskProgress(INFORMATION_MESSAGE, time);
} else {
this.notifyTaskProgress(ERROR_MESSAGE, "There are not files for indexing.");
throw new IndexException("There are not files for indexing.");
}
}
} catch (Exception e) {
message = "Class: SearchEngineTerrier\n"
+ " Message: An unexpected exception occured while indexing. Indexing has been aborted.\n";
this.notifyTaskProgress(INFORMATION_MESSAGE, message + " Error: " + e.getMessage());
throw new IndexException(message);
}
return indexedFiles;
}
/**
* Método para construir el índice a partir de una colección de files
*
*/
private long build(List<File> collectionPath) throws IndexException {
long indexedFiles = 0;
setStartTimeOfIndexation(new Date());
message = "Indexing to directory '" + this.indexPath + "'...";
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
try {
this.diskIndex = Index.createIndex();
//deleting existing files
if (this.diskIndex != null) {
this.diskIndex.close();
this.diskIndex = null;
}
if (safeToBuildIndex(this.indexPath)) {
//determinar indexer
final boolean useSinglePass = Boolean.parseBoolean(ApplicationSetup.getProperty("desktop.indexing.singlepass", "false"));
this.indexer = ApplicationSetup.BLOCK_INDEXING
? useSinglePass
? new BlockSinglePassIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)
: new BlockIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)
: useSinglePass
? new BasicSinglePassIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)
: new BasicIndexer(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX);
//get all the filespaths to index
List<String> foldersList = dataFilesList(collectionPath, new ArrayList<String>());
List<String> newList = verifyDocumentType(foldersList);
if (!newList.isEmpty()) {
this.indexedDocsCount = foldersList.size();
this.sfc = new SimpleFileCollection(foldersList, true);
this.indexer.index(new Collection[]{this.sfc}); //crea el indice
System.gc();
//abrir la lista de files de SimpleFileCollection
List<String> fileList = this.sfc.getFileList();
savePathsList(new File(ApplicationSetup.makeAbsolute(ApplicationSetup.getProperty("desktop.directories.filelist", "data.filelist"),
ApplicationSetup.TERRIER_INDEX_PATH)), fileList);
//verificar que se creo el indice
setIndexing(true);
if (loadIndex()) {
message = "El proceso de indexación ha terminado correctamente.\n"
+ "--------- Indexed files ---------";
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
String file;
for (int i = 0; i < fileList.size(); i++) {
file = fileList.get(i);
this.notifyTaskProgress(INFORMATION_MESSAGE, " - " + file);
}
} else {
this.notifyTaskProgress(ERROR_MESSAGE, "Problems when charging the index");
throw new IndexException("There are not files for indexing.");
}
setIndexing(false);
setEndTimeOfIndexation(new Date());
String time = "IndexationTime " + getIndexationTime() + " milliseconds";
this.notifyTaskProgress(INFORMATION_MESSAGE, time);
} else {
this.notifyTaskProgress(ERROR_MESSAGE, "There are not files for indexing.");
throw new IndexException("There are not files for indexing.");
}
}
} catch (Exception e) {
message = "Class: SearchEngineTerrier\n"
+ " Message: An unexpected exception occured while indexing. Indexing has been aborted.\n";
this.notifyTaskProgress(INFORMATION_MESSAGE, message + " Error: " + e.getMessage());
throw new IndexException(message +e.getMessage());
}
indexedFiles = this.indexedDocsCount;
return indexedFiles;
}
/**
* Devuelve en un a lista de String los paths de todos los files de una lista de directorios
*
* @param dirPath --lista de los files
* @param list --lista vacía
*
* @return --lista con los path
*/
private List<String> dataFilesList(List<File> dirPath, List<String> list) {
File file;
for (int i = 0; i < dirPath.size(); i++) {
file = dirPath.get(i);
if (file.canRead()) {
if (file.isDirectory()) {
File[] files = file.listFiles();
if (files != null) {
for (int j = 0; j < files.length; j++) {
dataFilesList(files[j], list);
}
}
} else {
list.add(file.getPath());
}
}
}
return list;
}
/**
* Devuelve en un a lista de String los paths de todos los files de un directorio
*
* @param dirPath --directorio de files
* @param list --lista vacía
*
* @return --lista con los path
*/
private List<String> dataFilesList(File dirPath, List<String> list) {
// do not try to index files that cannot be read
if (dirPath.canRead()) {
if (dirPath.isDirectory()) {
File[] files = dirPath.listFiles();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
dataFilesList(files[i], list);
}
}
} else {
list.add(dirPath.getPath());
}
}
return list;
}
/**
* Guarda una lista de paths en un file
* @param file
* @param list
*/
private void savePathsList(File file, List<String> list) {
try {
PrintWriter writer = new PrintWriter(
Files.writeFileWriter(file));
for (int i = 0; i < list.size(); i++) {
writer.println(list.get(i));
this.notifyTaskProgress(INFORMATION_MESSAGE, "Saving " + list.get(i));
}
writer.close();
} catch (IOException ioe) {
String error = "Error writing to file : " + file + " : " + ioe.getMessage();
this.notifyTaskProgress(ERROR_MESSAGE, error);
OutputMonitor.printStream(error, ioe);
return;
}
}
/**
* Carga una lista de los archivos de un directorio
* @param file
* @return
*/
private List<String> loadList(File file) {
if (file == null || !file.exists()) {
return new ArrayList<String>();
}
ArrayList<String> out = new ArrayList<String>();
try {
BufferedReader buf = Files.openFileReader(file);
String line;
while ((line = buf.readLine()) != null) {
//ignore empty lines, or lines starting with # from the methods
// file.
if (line.startsWith("#") || line.equals("")) {
continue;
}
out.add(line.trim());
}
buf.close();
} catch (IOException ex) {
OutputMonitor.printStream("IO", ex);
}
return out;
}
/**
* Prepara la aplicación para iniciar el proceso de consultas
*
* @return
*/
private boolean setManager() {
Index otherindex = Index.createIndex();
String managerName = ApplicationSetup.getProperty("desktop.manager", "Manager");
try {
if (managerName.indexOf('.') == -1) {
managerName = "uk.ac.gla.terrier.querying." + managerName;
}
this.queryingManager = (uk.ac.gla.terrier.querying.Manager) (Class.forName(managerName).getConstructor(new Class[]{Index.class}).newInstance(new Object[]{otherindex}));
} catch (Exception e) {
String error = "Problem loading Manager (" + managerName + "): " + e.getMessage();
this.notifyTaskProgress(ERROR_MESSAGE, error);
return false;
}
if (this.queryingManager == null) {
return false;
}
return true;
}
/**
* Guarda los resultados obtenidos
* @param r
* @return
* @throws IOException
*/
private ArrayList<DocumentMetaData> saveResults(ResultSet r) throws IOException {
ArrayList<DocumentMetaData> list = new ArrayList<DocumentMetaData>();
DocumentMetaData metaDoc;
List<String> indexedFiles = loadList(new File(ApplicationSetup.makeAbsolute(
ApplicationSetup.getProperty("desktop.directories.filelist",
"data.filelist"), ApplicationSetup.TERRIER_INDEX_PATH)));
int[] docIds = r.getDocids();
double[] scores = r.getScores();
int docId;
String path, name, fileType;
File f;
long size;
for (int i = 0; i < r.getResultSize(); i++) {
metaDoc = new DocumentMetaData();
docId = docIds[i];
path = indexedFiles.get(docId);
f = new File(path);
name = f.getName();
fileType = getFileExtension(path);
size = f.length();
metaDoc.setIndex(docId);
metaDoc.setName(name);
metaDoc.setPath(path);
metaDoc.setSize(size);
metaDoc.setType(fileType);
metaDoc.setSynthesis(null); //doesn´t have
metaDoc.setScore(scores[i]);
metaDoc.setSearcher(KeySearchable.TERRIER_SEARCH_ENGINE);
list.add(metaDoc);
}
this.retrievedDocsCount = list.size();
return list;
}
/**
* Devuelve una lista con los documentos a indexar
* de tipo java txt o pdf
*
* @param listPath
* @return
*/
public List<String> verifyDocumentType(List<String> listPath) {
List<String> list = new ArrayList<String>();
for (int i = 0; i < listPath.size(); i++) {
String string = listPath.get(i);
String type = getFileExtension(string);
if (type.equalsIgnoreCase("java") || type.equalsIgnoreCase("pdf") || type.equalsIgnoreCase("txt")) {
list.add(string);
} else {
message = "There are files in the collection that are not: .java, pdf o txt documents" + "\n" + "so, they could not be indexed.";
OutputMonitor.printLine(message, OutputMonitor.INFORMATION_MESSAGE);
this.notifyTaskProgress(INFORMATION_MESSAGE, message);
}
}
return list;
}
/**
* @return the indexing
*/
public boolean isIndexing() {
return indexing;
}
/**
* @param indexing the indexing to set
*/
public void setIndexing(boolean indexing) {
this.indexing = indexing;
}
public void setTerrierLocation() {
File ftest = new File("./terrier/");
if (ftest.exists()) {
System.setProperty("terrier.home", ftest.getAbsolutePath());
} else {
OutputMonitor.printLine("Problem loading terrier configuration files", OutputMonitor.ERROR_MESSAGE);
}
ApplicationSetup.BLOCK_INDEXING = true;
if ((ApplicationSetup.getProperty("querying.allowed.controls", null)) == null) {
ApplicationSetup.setProperty("querying.allowed.controls", "c,start,end,qe");
}
if ((ApplicationSetup.getProperty("querying.postprocesses.order", null)) == null) {
ApplicationSetup.setProperty("querying.postprocesses.order", "QueryExpansion");
}
if ((ApplicationSetup.getProperty("querying.postprocesses.controls", null)) == null) {
ApplicationSetup.setProperty("querying.postprocesses.controls", "qe:QueryExpansion");
}
ApplicationSetup.setProperty("indexing.max.tokens", "10000");
ApplicationSetup.setProperty("invertedfile.processterms", "25000");
ApplicationSetup.setProperty("ignore.low.idf.terms", "false");
ApplicationSetup.setProperty("matching.dsms", "BooleanFallback");
}
}