/* * Hibernate Search, full-text search for your domain model * * License: GNU Lesser General Public License (LGPL), version 2.1 or later * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. */ package org.hibernate.search.elasticsearch.analyzer.impl; import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.hibernate.search.analyzer.spi.AnalyzerReference; import org.hibernate.search.analyzer.spi.AnalyzerStrategy; import org.hibernate.search.annotations.AnalyzerDef; import org.hibernate.search.cfg.spi.SearchConfiguration; import org.hibernate.search.elasticsearch.analyzer.definition.ElasticsearchAnalysisDefinitionProvider; import org.hibernate.search.elasticsearch.analyzer.definition.impl.ElasticsearchAnalysisDefinitionRegistryBuilderImpl; import org.hibernate.search.elasticsearch.cfg.ElasticsearchEnvironment; import org.hibernate.search.elasticsearch.logging.impl.Log; import org.hibernate.search.elasticsearch.settings.impl.model.AnalyzerDefinition; import org.hibernate.search.elasticsearch.settings.impl.model.CharFilterDefinition; import org.hibernate.search.elasticsearch.settings.impl.model.TokenFilterDefinition; import org.hibernate.search.elasticsearch.settings.impl.model.TokenizerDefinition; import org.hibernate.search.elasticsearch.settings.impl.translation.ElasticsearchAnalyzerDefinitionTranslator; import org.hibernate.search.engine.service.spi.ServiceManager; import org.hibernate.search.engine.service.spi.ServiceReference; import org.hibernate.search.exception.SearchException; import org.hibernate.search.util.impl.ClassLoaderHelper; import org.hibernate.search.util.impl.ReflectionHelper; import org.hibernate.search.util.logging.impl.LoggerFactory; /** * @author Yoann Rodiere */ public class ElasticsearchAnalyzerStrategy implements AnalyzerStrategy { private static final Log LOG = LoggerFactory.make( Log.class ); private final ServiceManager serviceManager; private final SearchConfiguration cfg; public ElasticsearchAnalyzerStrategy(ServiceManager serviceManager, SearchConfiguration cfg) { this.serviceManager = serviceManager; this.cfg = cfg; } private SimpleElasticsearchAnalysisDefinitionRegistry createDefaultDefinitionRegistry() { ElasticsearchAnalysisDefinitionRegistryBuilderImpl builder = new ElasticsearchAnalysisDefinitionRegistryBuilderImpl(); String providerClassName = cfg.getProperty( ElasticsearchEnvironment.ANALYZER_DEFINITION_PROVIDER ); if ( providerClassName != null ) { ElasticsearchAnalysisDefinitionProvider provider; try { Class<?> providerClazz = ClassLoaderHelper.classForName( providerClassName, serviceManager ); provider = (ElasticsearchAnalysisDefinitionProvider) ReflectionHelper.createInstance( providerClazz, true ); } catch (RuntimeException e) { throw LOG.invalidElasticsearchAnalyzerDefinitionProvider( providerClassName, e ); } try { provider.register( builder ); } catch (SearchException e) { // Do not wrap our own exceptions (from the builder, for instance) throw e; } catch (RuntimeException e) { // Do wrap any other exception throw LOG.invalidLuceneAnalyzerDefinitionProvider( providerClassName, e ); } } return builder.build(); } @Override public ElasticsearchAnalyzerReference createDefaultAnalyzerReference() { return new NamedElasticsearchAnalyzerReference( "default" ); } @Override public ElasticsearchAnalyzerReference createPassThroughAnalyzerReference() { return new NamedElasticsearchAnalyzerReference( "keyword" ); } @Override public NamedElasticsearchAnalyzerReference createNamedAnalyzerReference(String name) { return new NamedElasticsearchAnalyzerReference( name ); } @Override public ElasticsearchAnalyzerReference createLuceneClassAnalyzerReference(Class<?> analyzerClass) { return new LuceneClassElasticsearchAnalyzerReference( analyzerClass ); } @Override public Map<String, AnalyzerReference> initializeAnalyzerReferences( Collection<AnalyzerReference> references, Map<String, AnalyzerDef> analyzerDefinitions) { try ( ServiceReference<ElasticsearchAnalyzerDefinitionTranslator> translatorReference = serviceManager.requestReference( ElasticsearchAnalyzerDefinitionTranslator.class ) ) { ElasticsearchAnalyzerDefinitionTranslator translator = translatorReference.get(); // First, create a registry containing all relevant definitions /* * Recreate the default definitions for each call, * so that the definition providers can add new definitions between two SearchFactory increments. */ SimpleElasticsearchAnalysisDefinitionRegistry defaultDefinitionRegistry = createDefaultDefinitionRegistry(); ElasticsearchAnalysisDefinitionRegistry definitionRegistry = createDefinitionRegistry( references, defaultDefinitionRegistry, analyzerDefinitions, translator); Set<String> existingNamedReferences = new HashSet<>(); // When all definitions are known and translated, actually initialize the references for ( AnalyzerReference reference : references ) { if ( reference.is( NamedElasticsearchAnalyzerReference.class ) ) { NamedElasticsearchAnalyzerReference namedReference = reference.unwrap( NamedElasticsearchAnalyzerReference.class ); if ( !namedReference.isInitialized() ) { initializeNamedReference( namedReference, definitionRegistry ); } existingNamedReferences.add( namedReference.getAnalyzerName() ); } else if ( reference.is( LuceneClassElasticsearchAnalyzerReference.class ) ) { LuceneClassElasticsearchAnalyzerReference luceneClassReference = reference.unwrap( LuceneClassElasticsearchAnalyzerReference.class ); if ( !luceneClassReference.isInitialized() ) { initializeLuceneClassReference( luceneClassReference, translator ); } } else if ( reference.is( ScopedElasticsearchAnalyzerReference.class ) ) { ScopedElasticsearchAnalyzerReference scopedReference = reference.unwrap( ScopedElasticsearchAnalyzerReference.class ); if ( !scopedReference.isInitialized() ) { scopedReference.initialize(); } } } /* * Finally, create additional references for default definitions that * haven't any matching reference, so that they will be available when querying. * We don't do that for @AnalyzerDefs because they may not all be related to Elasticsearch, * and they might even not be translatable to an Elasticsearch definition. */ Map<String, AnalyzerReference> additionalNamedReferences = new HashMap<>(); for ( String defaultAnalyzerName : defaultDefinitionRegistry.getAnalyzerDefinitions().keySet() ) { if ( !existingNamedReferences.contains( defaultAnalyzerName ) ) { NamedElasticsearchAnalyzerReference reference = createNamedAnalyzerReference( defaultAnalyzerName ); initializeNamedReference( reference, definitionRegistry ); additionalNamedReferences.put( defaultAnalyzerName, reference ); } } return additionalNamedReferences; } } private ElasticsearchAnalysisDefinitionRegistry createDefinitionRegistry(Collection<AnalyzerReference> references, ElasticsearchAnalysisDefinitionRegistry defaultDefinitionRegistry, Map<String, AnalyzerDef> analyzerDefinitions, ElasticsearchAnalyzerDefinitionTranslator translator) { /* * Make default definitions accessible in the final definition registry. * This final registry has two scopes: * - the "local" scope, which contains every definition gathered from pre-existing references (see below) * and definitions from the mapping * - the "default"/"global" scope, which contains definitions from the default registry (see above). * * When fetching definitions, the "local" scope takes precedence over the "default"/"global" scope. * * Note that thanks to this setup, changes to pre-existing default definitions are ignored * if the definitions were already used in pre-existing references. */ ElasticsearchAnalysisDefinitionRegistry definitionRegistry = new ChainingElasticsearchAnalysisDefinitionRegistry( defaultDefinitionRegistry ); /* * First, populate the registry with definitions from already initialized references. * * Those can occur if we are currently "incrementing" * on top of a previous version of the search factory. * In this case, we want to add previous definitions to the registry, * so as to check that we don't have conflicts * between the previous definitions and some new ones. */ for ( AnalyzerReference reference : references ) { if ( reference.is( NamedElasticsearchAnalyzerReference.class ) ) { NamedElasticsearchAnalyzerReference namedReference = reference.unwrap( NamedElasticsearchAnalyzerReference.class ); if ( namedReference.isInitialized() ) { // Note: these analyzers don't handle scope, we don't care about the field name namedReference.getAnalyzer().registerDefinitions( definitionRegistry, null ); } } } /* * Once the registry has been populated with pre-existing definitions, * add the new definitions as needed, throwing exceptions if there are conflicts. * * Note that we populate the registry only with the definitions we actually use. * That's because some @AnalyzerDef's cannot be translated, * and users may decide to add such definitions anyway because they need them * for entities indexed in an embedded Lucene instance (not ES). */ TranslatingElasticsearchAnalyzerDefinitionRegistryPopulator translatingPopulator = new TranslatingElasticsearchAnalyzerDefinitionRegistryPopulator( definitionRegistry, translator ); for ( AnalyzerReference reference : references ) { if ( reference.is( NamedElasticsearchAnalyzerReference.class ) ) { NamedElasticsearchAnalyzerReference namedReference = reference.unwrap( NamedElasticsearchAnalyzerReference.class ); if ( !namedReference.isInitialized() ) { String name = namedReference.getAnalyzerName(); AnalyzerDef hibernateSearchAnalyzerDef = analyzerDefinitions.get( name ); if ( hibernateSearchAnalyzerDef != null ) { translatingPopulator.registerAnalyzerDef( hibernateSearchAnalyzerDef ); } } } } return definitionRegistry; } private void initializeNamedReference(NamedElasticsearchAnalyzerReference analyzerReference, ElasticsearchAnalysisDefinitionRegistry definitionRegistry) { String name = analyzerReference.getAnalyzerName(); ElasticsearchAnalyzer analyzer = createAnalyzer( definitionRegistry, name ); analyzerReference.initialize( analyzer ); } private void initializeLuceneClassReference(LuceneClassElasticsearchAnalyzerReference analyzerReference, ElasticsearchAnalyzerDefinitionTranslator translator) { Class<?> clazz = analyzerReference.getLuceneClass(); String name = translator.translate( clazz ); ElasticsearchAnalyzer analyzer = new UndefinedElasticsearchAnalyzerImpl( name ); analyzerReference.initialize( name, analyzer ); } @Override public ScopedElasticsearchAnalyzerReference.Builder buildScopedAnalyzerReference(AnalyzerReference initialGlobalAnalyzerReference) { return new ScopedElasticsearchAnalyzerReference.DeferredInitializationBuilder( initialGlobalAnalyzerReference.unwrap( ElasticsearchAnalyzerReference.class ), Collections.<String, ElasticsearchAnalyzerReference>emptyMap() ); } private ElasticsearchAnalyzer createAnalyzer(ElasticsearchAnalysisDefinitionRegistry definitionRegistry, String analyzerName) { AnalyzerDefinition analyzerDefinition = definitionRegistry.getAnalyzerDefinition( analyzerName ); if ( analyzerDefinition == null ) { return new UndefinedElasticsearchAnalyzerImpl( analyzerName ); } String tokenizerName = analyzerDefinition.getTokenizer(); TokenizerDefinition tokenizerDefinition = definitionRegistry.getTokenizerDefinition( tokenizerName ); Map<String, TokenFilterDefinition> tokenFilters = new TreeMap<>(); if ( analyzerDefinition.getTokenFilters() != null ) { for ( String name : analyzerDefinition.getTokenFilters() ) { TokenFilterDefinition definition = definitionRegistry.getTokenFilterDefinition( name ); if ( definition != null ) { // Ignore missing definitions: they may be already available on the server tokenFilters.put( name, definition ); } } } Map<String, CharFilterDefinition> charFilters = new TreeMap<>(); if ( analyzerDefinition.getCharFilters() != null ) { for ( String name : analyzerDefinition.getCharFilters() ) { CharFilterDefinition definition = definitionRegistry.getCharFilterDefinition( name ); if ( definition != null ) { // Ignore missing definitions: they may be already available on the server charFilters.put( name, definition ); } } } return new CustomElasticsearchAnalyzerImpl( analyzerName, analyzerDefinition, tokenizerName, tokenizerDefinition, charFilters, tokenFilters ); } }