package com.code972.elasticsearch; import com.code972.elasticsearch.plugins.index.analysis.*; import com.code972.elasticsearch.plugins.rest.action.RestHebrewAnalyzerCheckWordAction; import com.code972.hebmorph.DictionaryLoader; import com.code972.hebmorph.datastructures.DictHebMorph; import com.code972.hebmorph.hspell.HSpellDictionaryLoader; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.analysis.Analyzer; import org.elasticsearch.SpecialPermission; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.IndexScopedSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.settings.SettingsFilter; import org.elasticsearch.env.Environment; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.plugins.ActionPlugin; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; import java.io.File; import java.io.IOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.security.AccessController; import java.security.PrivilegedAction; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Supplier; import static java.util.Collections.*; /** * The Hebrew analysis plugin entry point, locating and loading the dictionary and configuring * the tokenizer, token filters and analyzers */ public final class HebrewAnalysisPlugin extends Plugin implements ActionPlugin, AnalysisPlugin { private final Logger log = LogManager.getLogger(this.getClass()); private final static String commercialDictionaryLoaderClass = "com.code972.hebmorph.dictionary.impl.HebMorphDictionaryLoader"; private static DictHebMorph dict; public static DictHebMorph getDictionary() { return dict; } /** * Attempts to load a dictionary from paths specified in elasticsearch.yml. * If hebrew.dict.path is defined, try loading that first. * * @param settings */ public HebrewAnalysisPlugin(final Settings settings) { super(); final SecurityManager sm = System.getSecurityManager(); if (sm != null) { // unprivileged code such as scripts do not have SpecialPermission sm.checkPermission(new SpecialPermission()); } // Figure out which DictionaryLoader class to use for loading the dictionary DictionaryLoader dictLoader = (DictionaryLoader) AccessController.doPrivileged((PrivilegedAction<Object>) () -> { try { final Class clz; if ((clz = Class.forName(commercialDictionaryLoaderClass)) != null) { log.info("Dictionary loader available ({})", clz.getSimpleName()); try { Constructor ctor = Class.forName(commercialDictionaryLoaderClass).getConstructor(); return (DictionaryLoader) ctor.newInstance(); } catch (NoSuchMethodException | IllegalAccessException | InstantiationException | InvocationTargetException e) { log.error("Unable to load the HebMorph dictionary", e); } } } catch (ClassNotFoundException ignored) { // If external dictionary loaders are not present, we default to the one provided with OSS HebMorph } return null; }); if (dictLoader == null) { log.info("Defaulting to HSpell dictionary loader"); dictLoader = new HSpellDictionaryLoader(); } // If path was specified in settings, try that path first final String pathFromSettings = settings.get("hebrew.dict.path"); if (pathFromSettings != null && !pathFromSettings.isEmpty()) { final DictHebMorph tmp = AccessController.doPrivileged(new LoadDictAction(pathFromSettings, dictLoader)); log.info("Trying to load {} dictionary from path {}", dictLoader.dictionaryLoaderName(), pathFromSettings); if (tmp != null) { dict = tmp; log.info("Dictionary '{}' loaded successfully from path {}", dictLoader.dictionaryLoaderName(), pathFromSettings); return; } } final Environment env = new Environment(settings); for (final String path : dictLoader.getPossiblePaths(env.pluginsFile().resolve("analysis-hebrew").toAbsolutePath().toString())) { final DictHebMorph tmp = AccessController.doPrivileged(new LoadDictAction(path, dictLoader)); log.info("Trying to load {} from path {}", dictLoader.dictionaryLoaderName(), path); if (tmp != null) { dict = tmp; log.info("Dictionary '{}' loaded successfully from path {}", dictLoader.dictionaryLoaderName(), path); return; } } throw new IllegalArgumentException("Could not load any dictionary. Aborting!"); // TODO log "tried paths" } private class LoadDictAction implements PrivilegedAction<DictHebMorph> { private final String path; private final DictionaryLoader loader; public LoadDictAction(final String path, DictionaryLoader dictLoader) { this.path = path; this.loader = dictLoader; } @Override public DictHebMorph run() { final File file = new File(path); if (file.exists()) { try { return loader.loadDictionaryFromPath(path); } catch (IOException e) { log.error(e); } } return null; } } @Override public List<RestHandler> getRestHandlers(Settings settings, RestController restController, ClusterSettings clusterSettings, IndexScopedSettings indexScopedSettings, SettingsFilter settingsFilter, IndexNameExpressionResolver indexNameExpressionResolver, Supplier<DiscoveryNodes> nodesInCluster) { return singletonList(new RestHebrewAnalyzerCheckWordAction(settings, restController)); } @Override public Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> getTokenFilters() { final Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> extra = new HashMap<>(); extra.put("hebrew_lemmatizer", (indexSettings, env, name, settings) -> new HebrewLemmatizerTokenFilterFactory(indexSettings, env, name, settings, dict)); extra.put("niqqud", NiqqudFilterTokenFilterFactory::new); extra.put("add_suffix", AddSuffixTokenFilterFactory::new); return unmodifiableMap(extra); } @Override public Map<String, AnalysisModule.AnalysisProvider<TokenizerFactory>> getTokenizers() { return singletonMap("hebrew", (indexSettings, env, name, settings) -> new HebrewTokenizerFactory(indexSettings, env, name, settings, dict)); } @Override public Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getAnalyzers() { final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> extra = new HashMap<>(); extra.put("hebrew", (indexSettings, env, name, settings) -> new HebrewIndexingAnalyzerProvider(indexSettings, env, name, settings, dict)); extra.put("hebrew_query", (indexSettings, env, name, settings) -> new HebrewQueryAnalyzerProvider(indexSettings, env, name, settings, dict)); extra.put("hebrew_query_light", (indexSettings, env, name, settings) -> new HebrewQueryLightAnalyzerProvider(indexSettings, env, name, settings, dict)); extra.put("hebrew_exact", (indexSettings, env, name, settings) -> new HebrewExactAnalyzerProvider(indexSettings, env, name, settings, dict)); return unmodifiableMap(extra); } }