/**
* OLAT - Online Learning and Training<br>
* http://www.olat.org
* <p>
* Licensed under the Apache License, Version 2.0 (the "License"); <br>
* you may not use this file except in compliance with the License.<br>
* You may obtain a copy of the License at
* <p>
* http://www.apache.org/licenses/LICENSE-2.0
* <p>
* Unless required by applicable law or agreed to in writing,<br>
* software distributed under the License is distributed on an "AS IS" BASIS, <br>
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. <br>
* See the License for the specific language governing permissions and <br>
* limitations under the License.
* <p>
* Copyright (c) since 2004 at Multimedia- & E-Learning Services (MELS),<br>
* University of Zurich, Switzerland.
* <hr>
* <a href="http://www.openolat.org">
* OpenOLAT - Online Learning and Training</a><br>
* This file has been modified by the OpenOLAT community. Changes are licensed
* under the Apache 2.0 license as the original file.
*/
package org.olat.search.service.spell;
import java.io.File;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.search.spell.LuceneDictionary;
import org.apache.lucene.search.spell.SpellChecker;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.olat.core.logging.OLog;
import org.olat.core.logging.Tracing;
import org.olat.core.util.FileUtils;
import org.olat.search.SearchService;
import org.olat.search.model.OlatDocument;
/**
* Spell-checker part inside of search-service.
* Service to check certain search-query for similar available search.queries.
* @author Christian Guretzki
*/
public class SearchSpellChecker {
private static OLog log = Tracing.createLoggerFor(SearchSpellChecker.class);
private static final String CONTENT_PATH = "_content";
private static final String TITLE_PATH = "_title";
private static final String DESCRIPTION_PATH = "_description";
private static final String AUTHOR_PATH = "_author";
private String indexPath;
private String spellDictionaryPath;
private SpellChecker spellChecker;
private boolean isSpellCheckEnabled = true;
private ExecutorService searchExecutor;
public SearchSpellChecker() {
//
}
public void setSearchExecutor(ExecutorService searchExecutor) {
this.searchExecutor = searchExecutor;
}
/**
* Check for valid similar search terms
* @param query
* @return Returns list of String with similar search-words.
* Returns null when spell-checker is disabled or has an exception.
*/
public Set<String> check(String query) {
try {
CheckCallable run = new CheckCallable(query, this);
Future<Set<String>> futureResults = searchExecutor.submit(run);
return futureResults.get();
} catch (Exception e) {
log.warn("Can not spell check",e);
return new HashSet<String>();
}
}
protected SpellChecker getSpellChecker() {
if(spellChecker==null) { //lazy initialization
try {
synchronized(spellDictionaryPath) {//o_clusterOK by:pb if service is only configured on one vm, which is recommended way
File spellDictionaryFile = new File(spellDictionaryPath);
Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);
if (spellChecker==null && DirectoryReader.indexExists(spellIndexDirectory) && isSpellCheckEnabled ) {
spellChecker = new SpellChecker(spellIndexDirectory);
spellChecker.setAccuracy(0.7f);
}
}
} catch (IOException e) {
log.warn("Can not initialze SpellChecker",e);
}
}
return spellChecker;
}
/**
* Creates a new spell-check index based on search-index
*
*/
public void createSpellIndex() {
if (isSpellCheckEnabled) {
IndexReader indexReader = null;
try {
log.info("Start generating Spell-Index...");
long startSpellIndexTime = 0;
if (log.isDebug()) startSpellIndexTime = System.currentTimeMillis();
Directory indexDir = FSDirectory.open(new File(indexPath));
indexReader = DirectoryReader.open(indexDir);
// 1. Create content spellIndex
File spellDictionaryFile = new File(spellDictionaryPath);
FSDirectory contentSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + CONTENT_PATH));//true
SpellChecker contentSpellChecker = new SpellChecker(contentSpellIndexDirectory);
Dictionary contentDictionary = new LuceneDictionary(indexReader, OlatDocument.CONTENT_FIELD_NAME);
Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION);
IndexWriterConfig contentIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
contentSpellChecker.indexDictionary(contentDictionary, contentIndexWriterConfig, true);
// 2. Create title spellIndex
FSDirectory titleSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + TITLE_PATH));//true
SpellChecker titleSpellChecker = new SpellChecker(titleSpellIndexDirectory);
Dictionary titleDictionary = new LuceneDictionary(indexReader, OlatDocument.TITLE_FIELD_NAME);
IndexWriterConfig titleIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
titleSpellChecker.indexDictionary(titleDictionary, titleIndexWriterConfig, true);
// 3. Create description spellIndex
FSDirectory descriptionSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + DESCRIPTION_PATH));//true
SpellChecker descriptionSpellChecker = new SpellChecker(descriptionSpellIndexDirectory);
Dictionary descriptionDictionary = new LuceneDictionary(indexReader, OlatDocument.DESCRIPTION_FIELD_NAME);
IndexWriterConfig descIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
descriptionSpellChecker.indexDictionary(descriptionDictionary, descIndexWriterConfig, true);
// 4. Create author spellIndex
FSDirectory authorSpellIndexDirectory = FSDirectory.open(new File(spellDictionaryPath + AUTHOR_PATH));//true
SpellChecker authorSpellChecker = new SpellChecker(authorSpellIndexDirectory);
Dictionary authorDictionary = new LuceneDictionary(indexReader, OlatDocument.AUTHOR_FIELD_NAME);
IndexWriterConfig authorIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
authorSpellChecker.indexDictionary(authorDictionary, authorIndexWriterConfig, true);
// Merge all part spell indexes (content,title etc.) to one common spell index
Directory spellIndexDirectory = FSDirectory.open(spellDictionaryFile);//true
//clean up the main index
IndexWriterConfig spellIndexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
IndexWriter merger = new IndexWriter(spellIndexDirectory, spellIndexWriterConfig);
merger.deleteAll();
merger.commit();
Directory[] directories = { contentSpellIndexDirectory, titleSpellIndexDirectory, descriptionSpellIndexDirectory, authorSpellIndexDirectory};
for(Directory directory:directories) {
merger.addIndexes(directory);
}
merger.close();
contentSpellChecker.close();
titleSpellChecker.close();
descriptionSpellChecker.close();
authorSpellChecker.close();
//remove all files
FileUtils.deleteDirsAndFiles(contentSpellIndexDirectory.getDirectory(), true, true);
FileUtils.deleteDirsAndFiles(titleSpellIndexDirectory.getDirectory(), true, true);
FileUtils.deleteDirsAndFiles(descriptionSpellIndexDirectory.getDirectory(), true, true);
FileUtils.deleteDirsAndFiles(authorSpellIndexDirectory.getDirectory(), true, true);
spellChecker = new SpellChecker(spellIndexDirectory);
spellChecker.setAccuracy(0.7f);
if (log.isDebug()) log.debug("SpellIndex created in " + (System.currentTimeMillis() - startSpellIndexTime) + "ms");
log.info("New generated Spell-Index ready to use.");
} catch(IOException ioEx) {
log.warn("Can not create SpellIndex",ioEx);
} finally {
if (indexReader != null) {
try {
indexReader.close();
} catch (IOException e) {
log.warn("Can not close indexReader properly",e);
}
}
}
}
}
/**
*
* @param indexPath Sets the absolute file-path to search index directory.
*/
public void setIndexPath(String indexPath) {
this.indexPath = indexPath;
}
/**
*
* @param isSpellCheckEnabled Sets the absolute file-path to spell-check index directory.
*/
public void setSpellCheckEnabled(boolean isSpellCheckEnabled) {
this.isSpellCheckEnabled = isSpellCheckEnabled;
}
/**
*
* @param spellDictionaryPath Enable/disable spell-checker
*/
public void setSpellDictionaryPath(String spellDictionaryPath) {
this.spellDictionaryPath = spellDictionaryPath;
}
}