/* * Hibernate Search, full-text search for your domain model * * License: GNU Lesser General Public License (LGPL), version 2.1 or later * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. */ package org.hibernate.search.elasticsearch.settings.impl.translation; import java.util.Map; import java.util.Properties; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicNormalizationFilterFactory; import org.apache.lucene.analysis.ar.ArabicStemFilterFactory; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.bg.BulgarianStemFilterFactory; import org.apache.lucene.analysis.br.BrazilianAnalyzer; import org.apache.lucene.analysis.br.BrazilianStemFilterFactory; import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory; import org.apache.lucene.analysis.charfilter.MappingCharFilterFactory; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cjk.CJKBigramFilterFactory; import org.apache.lucene.analysis.cjk.CJKWidthFilterFactory; import org.apache.lucene.analysis.ckb.SoraniAnalyzer; import org.apache.lucene.analysis.ckb.SoraniNormalizationFilterFactory; import org.apache.lucene.analysis.ckb.SoraniStemFilterFactory; import org.apache.lucene.analysis.commongrams.CommonGramsFilterFactory; import org.apache.lucene.analysis.compound.DictionaryCompoundWordTokenFilterFactory; import org.apache.lucene.analysis.compound.HyphenationCompoundWordTokenFilterFactory; import org.apache.lucene.analysis.core.DecimalDigitFilterFactory; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.KeywordTokenizerFactory; import org.apache.lucene.analysis.core.LetterTokenizerFactory; import org.apache.lucene.analysis.core.LowerCaseFilterFactory; import org.apache.lucene.analysis.core.LowerCaseTokenizerFactory; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.StopFilterFactory; import org.apache.lucene.analysis.core.TypeTokenFilterFactory; import org.apache.lucene.analysis.core.UpperCaseFilterFactory; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.cz.CzechStemFilterFactory; import org.apache.lucene.analysis.da.DanishAnalyzer; import org.apache.lucene.analysis.de.GermanAnalyzer; import org.apache.lucene.analysis.de.GermanLightStemFilterFactory; import org.apache.lucene.analysis.de.GermanMinimalStemFilterFactory; import org.apache.lucene.analysis.de.GermanNormalizationFilterFactory; import org.apache.lucene.analysis.de.GermanStemFilterFactory; import org.apache.lucene.analysis.el.GreekAnalyzer; import org.apache.lucene.analysis.el.GreekStemFilterFactory; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.en.EnglishMinimalStemFilterFactory; import org.apache.lucene.analysis.en.EnglishPossessiveFilterFactory; import org.apache.lucene.analysis.en.KStemFilterFactory; import org.apache.lucene.analysis.en.PorterStemFilterFactory; import org.apache.lucene.analysis.es.SpanishAnalyzer; import org.apache.lucene.analysis.es.SpanishLightStemFilterFactory; import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.apache.lucene.analysis.fa.PersianAnalyzer; import org.apache.lucene.analysis.fa.PersianNormalizationFilterFactory; import org.apache.lucene.analysis.fi.FinnishAnalyzer; import org.apache.lucene.analysis.fi.FinnishLightStemFilterFactory; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.fr.FrenchLightStemFilterFactory; import org.apache.lucene.analysis.fr.FrenchMinimalStemFilterFactory; import org.apache.lucene.analysis.ga.IrishAnalyzer; import org.apache.lucene.analysis.gl.GalicianAnalyzer; import org.apache.lucene.analysis.gl.GalicianMinimalStemFilterFactory; import org.apache.lucene.analysis.gl.GalicianStemFilterFactory; import org.apache.lucene.analysis.hi.HindiAnalyzer; import org.apache.lucene.analysis.hi.HindiNormalizationFilterFactory; import org.apache.lucene.analysis.hi.HindiStemFilterFactory; import org.apache.lucene.analysis.hu.HungarianAnalyzer; import org.apache.lucene.analysis.hu.HungarianLightStemFilterFactory; import org.apache.lucene.analysis.hy.ArmenianAnalyzer; import org.apache.lucene.analysis.id.IndonesianAnalyzer; import org.apache.lucene.analysis.id.IndonesianStemFilterFactory; import org.apache.lucene.analysis.in.IndicNormalizationFilterFactory; import org.apache.lucene.analysis.it.ItalianAnalyzer; import org.apache.lucene.analysis.it.ItalianLightStemFilterFactory; import org.apache.lucene.analysis.lt.LithuanianAnalyzer; import org.apache.lucene.analysis.lv.LatvianAnalyzer; import org.apache.lucene.analysis.lv.LatvianStemFilterFactory; import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilterFactory; import org.apache.lucene.analysis.miscellaneous.KeepWordFilterFactory; import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilterFactory; import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory; import org.apache.lucene.analysis.miscellaneous.LengthFilterFactory; import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilterFactory; import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory; import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilterFactory; import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory; import org.apache.lucene.analysis.miscellaneous.TrimFilterFactory; import org.apache.lucene.analysis.miscellaneous.TruncateTokenFilterFactory; import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory; import org.apache.lucene.analysis.ngram.EdgeNGramFilterFactory; import org.apache.lucene.analysis.ngram.EdgeNGramTokenizerFactory; import org.apache.lucene.analysis.ngram.NGramFilterFactory; import org.apache.lucene.analysis.ngram.NGramTokenizerFactory; import org.apache.lucene.analysis.nl.DutchAnalyzer; import org.apache.lucene.analysis.no.NorwegianAnalyzer; import org.apache.lucene.analysis.no.NorwegianLightStemFilterFactory; import org.apache.lucene.analysis.no.NorwegianMinimalStemFilterFactory; import org.apache.lucene.analysis.path.PathHierarchyTokenizerFactory; import org.apache.lucene.analysis.pattern.PatternCaptureGroupFilterFactory; import org.apache.lucene.analysis.pattern.PatternReplaceCharFilterFactory; import org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory; import org.apache.lucene.analysis.pattern.PatternTokenizerFactory; import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilterFactory; import org.apache.lucene.analysis.pt.PortugueseAnalyzer; import org.apache.lucene.analysis.pt.PortugueseLightStemFilterFactory; import org.apache.lucene.analysis.pt.PortugueseMinimalStemFilterFactory; import org.apache.lucene.analysis.pt.PortugueseStemFilterFactory; import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory; import org.apache.lucene.analysis.ro.RomanianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.ru.RussianLightStemFilterFactory; import org.apache.lucene.analysis.shingle.ShingleFilterFactory; import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory; import org.apache.lucene.analysis.sr.SerbianNormalizationFilterFactory; import org.apache.lucene.analysis.standard.ClassicFilterFactory; import org.apache.lucene.analysis.standard.ClassicTokenizerFactory; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilterFactory; import org.apache.lucene.analysis.standard.StandardTokenizerFactory; import org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory; import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.sv.SwedishLightStemFilterFactory; import org.apache.lucene.analysis.synonym.SynonymFilterFactory; import org.apache.lucene.analysis.th.ThaiAnalyzer; import org.apache.lucene.analysis.th.ThaiTokenizerFactory; import org.apache.lucene.analysis.tr.ApostropheFilterFactory; import org.apache.lucene.analysis.tr.TurkishAnalyzer; import org.apache.lucene.analysis.util.CharFilterFactory; import org.apache.lucene.analysis.util.ElisionFilterFactory; import org.apache.lucene.analysis.util.ResourceLoader; import org.apache.lucene.analysis.util.TokenFilterFactory; import org.apache.lucene.analysis.util.TokenizerFactory; import org.hibernate.search.annotations.CharFilterDef; import org.hibernate.search.annotations.TokenFilterDef; import org.hibernate.search.annotations.TokenizerDef; import org.hibernate.search.cfg.spi.ParameterAnnotationsReader; import org.hibernate.search.elasticsearch.analyzer.ElasticsearchCharFilterFactory; import org.hibernate.search.elasticsearch.analyzer.ElasticsearchTokenFilterFactory; import org.hibernate.search.elasticsearch.analyzer.ElasticsearchTokenizerFactory; import org.hibernate.search.elasticsearch.logging.impl.Log; import org.hibernate.search.elasticsearch.settings.impl.model.CharFilterDefinition; import org.hibernate.search.elasticsearch.settings.impl.model.TokenFilterDefinition; import org.hibernate.search.elasticsearch.settings.impl.model.TokenizerDefinition; import org.hibernate.search.engine.service.spi.Startable; import org.hibernate.search.engine.service.spi.Stoppable; import org.hibernate.search.spi.BuildContext; import org.hibernate.search.util.impl.HibernateSearchResourceLoader; import org.hibernate.search.util.logging.impl.LoggerFactory; import com.google.gson.JsonElement; import com.google.gson.JsonPrimitive; /** * @author Yoann Rodiere */ public class DefaultElasticsearchAnalyzerDefinitionTranslator implements ElasticsearchAnalyzerDefinitionTranslator, Startable, Stoppable { private static final Log log = LoggerFactory.make( Log.class ); private Map<String, String> luceneAnalyzers; private Map<String, AnalysisDefinitionFactory<CharFilterDefinition>> luceneCharFilters; private Map<String, AnalysisDefinitionFactory<TokenizerDefinition>> luceneTokenizers; private Map<String, AnalysisDefinitionFactory<TokenFilterDefinition>> luceneTokenFilters; @Override public void start(Properties properties, BuildContext context) { final ResourceLoader resourceLoader = new HibernateSearchResourceLoader( context.getServiceManager() ); luceneAnalyzers = new LuceneAnalyzerImplementationTranslationMapBuilder() .add( StandardAnalyzer.class, "standard" ) .add( SimpleAnalyzer.class, "simple" ) .add( WhitespaceAnalyzer.class, "whitespace" ) .add( StopAnalyzer.class, "stop" ) .add( KeywordAnalyzer.class, "keyword" ) .add( ArabicAnalyzer.class, "arabic" ) .add( ArmenianAnalyzer.class, "armenian" ) .add( BasqueAnalyzer.class, "basque" ) .add( BrazilianAnalyzer.class, "brazilian" ) .add( BulgarianAnalyzer.class, "bulgarian" ) .add( CatalanAnalyzer.class, "catalan" ) .add( CJKAnalyzer.class, "cjk" ) .add( CzechAnalyzer.class, "czech" ) .add( DanishAnalyzer.class, "danish" ) .add( DutchAnalyzer.class, "dutch" ) .add( EnglishAnalyzer.class, "english" ) .add( FinnishAnalyzer.class, "finnish" ) .add( FrenchAnalyzer.class, "french" ) .add( GalicianAnalyzer.class, "galician" ) .add( GermanAnalyzer.class, "german" ) .add( GreekAnalyzer.class, "greek" ) .add( HindiAnalyzer.class, "hindi" ) .add( HungarianAnalyzer.class, "hungarian" ) .add( IndonesianAnalyzer.class, "indonesian" ) .add( IrishAnalyzer.class, "irish" ) .add( ItalianAnalyzer.class, "italian" ) .add( LatvianAnalyzer.class, "latvian" ) .add( LithuanianAnalyzer.class, "lithuanian" ) .add( NorwegianAnalyzer.class, "norwegian" ) .add( PersianAnalyzer.class, "persian" ) .add( PortugueseAnalyzer.class, "portuguese" ) .add( RomanianAnalyzer.class, "romanian" ) .add( RussianAnalyzer.class, "russian" ) .add( SoraniAnalyzer.class, "sorani" ) .add( SpanishAnalyzer.class, "spanish" ) .add( SwedishAnalyzer.class, "swedish" ) .add( TurkishAnalyzer.class, "turkish" ) .add( ThaiAnalyzer.class, "thai" ) .build(); luceneCharFilters = new LuceneAnalysisDefinitionTranslationMapBuilder<>( CharFilterDefinition.class ) .builder( MappingCharFilterFactory.class, "mapping" ) // "mappings" (unmapped) is an array .rename( "mapping", "mappings" ) .transform( "mapping", new CharMappingRuleFileParameterValueTransformer( resourceLoader ) ) .end() .builder( HTMLStripCharFilterFactory.class, "html_strip" ) .rename( "escapedTags", "escaped_tags" ) .transform( "escapedTags", new SplitArrayParameterValueTransformer( "[\\s,]+", StringParameterValueTransformer.INSTANCE ) ) .end() .builder( PatternReplaceCharFilterFactory.class, "pattern_replace" ).end() .addJsonPassThrough( ElasticsearchCharFilterFactory.class ) .build(); luceneTokenizers = new LuceneAnalysisDefinitionTranslationMapBuilder<>( TokenizerDefinition.class ) .builder( StandardTokenizerFactory.class, "standard" ) .rename( "maxTokenLength", "max_token_length" ) .end() .builder( EdgeNGramTokenizerFactory.class, "edgeNGram" ) // "token_chars" is an array of strings .rename( "minGramSize", "min_gram" ) .rename( "maxGramSize", "max_gram" ) .end() .builder( KeywordTokenizerFactory.class, "keyword" ).end() .builder( LetterTokenizerFactory.class, "letter" ).end() .builder( LowerCaseTokenizerFactory.class, "lowercase" ).end() .builder( NGramTokenizerFactory.class, "nGram" ) // "token_chars" is an array of strings .rename( "minGramSize", "min_gram" ) .rename( "maxGramSize", "max_gram" ) .end() .builder( WhitespaceTokenizerFactory.class, "whitespace" ) .disallow( "rule" ) .end() .builder( PatternTokenizerFactory.class, "pattern" ).end() .builder( UAX29URLEmailTokenizerFactory.class, "uax_url_email" ) .rename( "maxTokenLength", "max_token_length" ) .end() .builder( PathHierarchyTokenizerFactory.class, "path_hierarchy" ) .rename( "replace", "replacement" ) .end() .builder( ClassicTokenizerFactory.class, "classic" ) .rename( "maxTokenLength", "max_token_length" ) .end() .builder( ThaiTokenizerFactory.class, "thai" ).end() .addJsonPassThrough( ElasticsearchTokenizerFactory.class ) .build(); luceneTokenFilters = new LuceneAnalysisDefinitionTranslationMapBuilder<>( TokenFilterDefinition.class ) .builder( StandardFilterFactory.class, "standard" ).end() .builder( ASCIIFoldingFilterFactory.class, "asciifolding" ) .rename( "preserveOriginal", "preserve_original" ) .end() .builder( LengthFilterFactory.class, "length" ).end() .builder( LowerCaseFilterFactory.class, "lowercase" ).end() .builder( UpperCaseFilterFactory.class, "uppercase" ).end() .builder( NGramFilterFactory.class, "nGram" ) .rename( "minGramSize", "min_gram" ) .rename( "maxGramSize", "max_gram" ) .end() .builder( EdgeNGramFilterFactory.class, "edgeNGram" ) .rename( "minGramSize", "min_gram" ) .rename( "maxGramSize", "max_gram" ) .end() .builder( PorterStemFilterFactory.class, "porter_stem" ).end() .builder( ShingleFilterFactory.class, "shingle" ) .rename( "minShingleSize", "max_shingle_size" ) .rename( "maxShingleSize", "min_shingle_size" ) .rename( "outputUnigrams", "output_unigrams" ) .rename( "outputUnigramsIfNoShingles", "output_unigrams_if_no_shingles" ) .rename( "tokenSeparator", "token_separator" ) .rename( "fillerToken", "filler_token" ) .end() .builder( StopFilterFactory.class, "stop" ) // "stopwords" array (or string), "stopwords_path" file path .rename( "words", "stopwords" ) .transform( "words", new WordSetFileParameterValueTransformer( resourceLoader ) ) .rename( "ignoreCase", "ignore_case" ) .disallow( "format" ) .disallow( "enablePositionIncrements" ) .end() .builder( WordDelimiterFilterFactory.class, "word_delimiter" ) // "protected_words" array, "protected_words_path" file path, "type_table" array, "type_table_path" file path .rename( "generateWordParts", "generate_word_parts" ) .rename( "generateNumberParts", "generate_number_parts" ) .rename( "catenateWords", "catenate_words" ) .rename( "catenateNumbers", "catenate_numbers" ) .rename( "catenateAll", "catenate_all" ) .rename( "splitOnCaseChange", "split_on_case_change" ) .rename( "splitOnNumerics", "split_on_numerics" ) .rename( "preserveOriginal", "preserve_original" ) .rename( "stemEnglishPossessive", "stem_english_possessive" ) .rename( "protected", "protected_words" ) .transform( "protected", new WordSetFileParameterValueTransformer( resourceLoader ) ) .rename( "types", "type_table" ) .transform( "types", new WordSetFileParameterValueTransformer( resourceLoader ) ) .end() .builder( ArabicStemFilterFactory.class, "stemmer" ).add( "name", "armenian" ).end() .builder( BrazilianStemFilterFactory.class, "stemmer" ).add( "name", "brazilian" ).end() .builder( BulgarianStemFilterFactory.class, "stemmer" ).add( "name", "bulgarian" ).end() .builder( CzechStemFilterFactory.class, "stemmer" ).add( "name", "czech" ).end() .builder( EnglishMinimalStemFilterFactory.class, "stemmer" ).add( "name", "minimal_english" ).end() .builder( EnglishPossessiveFilterFactory.class, "stemmer" ).add( "name", "possessive_english" ).end() .builder( FinnishLightStemFilterFactory.class, "stemmer" ).add( "name", "light_finnish" ).end() .builder( FrenchLightStemFilterFactory.class, "stemmer" ).add( "name", "light_french" ).end() .builder( FrenchMinimalStemFilterFactory.class, "stemmer" ).add( "name", "minimal_french" ).end() .builder( GalicianStemFilterFactory.class, "stemmer" ).add( "name", "galician" ).end() .builder( GalicianMinimalStemFilterFactory.class, "stemmer" ).add( "name", "minimal_galician" ).end() .builder( GermanStemFilterFactory.class, "stemmer" ).add( "name", "german" ).end() .builder( GermanMinimalStemFilterFactory.class, "stemmer" ).add( "name", "minimal_german" ).end() .builder( GermanLightStemFilterFactory.class, "stemmer" ).add( "name", "light_german" ).end() .builder( GreekStemFilterFactory.class, "stemmer" ).add( "name", "greek" ).end() .builder( HindiStemFilterFactory.class, "stemmer" ).add( "name", "hindi" ).end() .builder( HungarianLightStemFilterFactory.class, "stemmer" ).add( "name", "light_hungarian" ).end() .builder( IndonesianStemFilterFactory.class, "stemmer" ).add( "name", "indonesian" ).end() .builder( ItalianLightStemFilterFactory.class, "stemmer" ).add( "name", "light_italian" ).end() .builder( SoraniStemFilterFactory.class, "stemmer" ).add( "name", "sorani" ).end() .builder( LatvianStemFilterFactory.class, "stemmer" ).add( "name", "latvian" ).end() .builder( NorwegianLightStemFilterFactory.class, "stemmer" ) .add( "name", "light_norwegian" ) .rename( "variant", "name" ) .transform( "variant" ) .add( "nb", "light_norwegian" ) .add( "nn", "light_nynorsk" ) .end() .end() .builder( NorwegianMinimalStemFilterFactory.class, "stemmer" ) .add( "name", "minimal_norwegian" ) .rename( "variant", "name" ) .transform( "variant" ) .add( "nb", "minimal_norwegian" ) .add( "nn", "minimal_nynorsk" ) .end() .end() .builder( PortugueseStemFilterFactory.class, "stemmer" ).add( "name", "portuguese_rslp" ).end() .builder( PortugueseLightStemFilterFactory.class, "stemmer" ).add( "name", "light_portuguese" ).end() .builder( PortugueseMinimalStemFilterFactory.class, "stemmer" ).add( "name", "minimal_portuguese" ).end() .builder( RussianLightStemFilterFactory.class, "stemmer" ).add( "name", "light_russian" ).end() .builder( SpanishLightStemFilterFactory.class, "stemmer" ).add( "name", "light_spanish" ).end() .builder( SwedishLightStemFilterFactory.class, "stemmer" ).add( "name", "light_swedish" ).end() .builder( StemmerOverrideFilterFactory.class, "stemmer_override" ) // "rules" array, "rules_path" file path .rename( "dictionary", "rules" ) .transform( "dictionary", new StemmerOverrideRuleFileParameterValueTransformer( resourceLoader ) ) .disallow( "ignoreCase" ) .end() .builder( KeywordMarkerFilterFactory.class, "keyword_marker" ) // "keywords" array, "keywords_path" file path .rename( "protected", "keywords" ) .transform( "protected", new WordSetFileParameterValueTransformer( resourceLoader ) ) .disallow( "pattern" ) .rename( "ignoreCase", "ignore_case" ) .end() .builder( KeywordRepeatFilterFactory.class, "keyword_repeat" ).end() .builder( KStemFilterFactory.class, "kstem" ).end() .builder( SnowballPorterFilterFactory.class, "snowball" ) .disallow( "protected" ) .end() .builder( SynonymFilterFactory.class, "synonym" ) // "synonyms" array, "synonyms_path" file path .rename( "ignoreCase", "ignore_case" ) .transform( new SynonymsParametersTransformer( SynonymFilterFactory.class, resourceLoader ) ) .disallow( "analyzer" ) .rename( "tokenizerFactory", "tokenizer" ) .transform( "tokenizerFactory", new TokenizerClassNameToElasticsearchTypeNameTransformer( SynonymFilterFactory.class, "tokenizerFactory" ) ) .end() .builder( HyphenationCompoundWordTokenFilterFactory.class, "hyphenation_decompounder" ) .rename( "dictionary", "word_list" ) .transform( "dictionary", new WordSetFileParameterValueTransformer( resourceLoader ) ) .disallow( "encoding" ) .rename( "hyphenator", "hyphenation_patterns_path" ) // The file must be on the Elasticsearch servers, because we can't forward the content of a local file .rename( "minWordSize", "min_word_size" ) .rename( "minSubwordSize", "min_subword_size" ) .rename( "maxSubwordSize", "max_subword_size" ) .rename( "onlyLongestMatch", "only_longest_match" ) .end() .builder( DictionaryCompoundWordTokenFilterFactory.class, "dictionary_decompounder" ) .rename( "dictionary", "word_list" ) .transform( "dictionary", new WordSetFileParameterValueTransformer( resourceLoader ) ) .rename( "minWordSize", "min_word_size" ) .rename( "minSubwordSize", "min_subword_size" ) .rename( "maxSubwordSize", "max_subword_size" ) .rename( "onlyLongestMatch", "only_longest_match" ) .end() .builder( ReverseStringFilterFactory.class, "reverse" ).end() .builder( ElisionFilterFactory.class, "elision" ) .transform( "articles", new WordSetFileParameterValueTransformer( resourceLoader ) ) .rename( "ignoreCase", "articles_case" ) .end() .builder( TruncateTokenFilterFactory.class, "truncate" ) .rename( "prefixLength", "length" ) .end() .builder( PatternCaptureGroupFilterFactory.class, "pattern_capture" ) // "patterns" array .rename( "pattern", "patterns" ) .transform( "pattern", new SingleElementArrayParameterValueTransformer( StringParameterValueTransformer.INSTANCE ) ) .end() .builder( PatternReplaceFilterFactory.class, "pattern_replace" ) .rename( "replace", "all" ) .transform( "replace" ) .add( "all", "true" ) .add( "first", "false" ) .end() .end() .builder( TrimFilterFactory.class, "trim" ) .disallow( "updateOffsets" ) .end() .builder( LimitTokenCountFilterFactory.class, "limit" ) .rename( "maxTokenCount", "max_token_count" ) .rename( "consumeAllTokens", "consume_all_tokens" ) .end() .builder( CommonGramsFilterFactory.class, "common_grams" ) // "common_words" array, "common_words_path" file path .rename( "words", "common_words" ) .transform( "words", new WordSetFileParameterValueTransformer( resourceLoader ) ) .disallow( "format" ) // Only one format is supported .rename( "ignoreCase", "ignore_case" ) .end() .builder( ArabicNormalizationFilterFactory.class, "arabic_normalization" ).end() .builder( GermanNormalizationFilterFactory.class, "german_normalization" ).end() .builder( HindiNormalizationFilterFactory.class, "hindi_normalization" ).end() .builder( IndicNormalizationFilterFactory.class, "indic_normalization" ).end() .builder( SoraniNormalizationFilterFactory.class, "sorani_normalization" ).end() .builder( PersianNormalizationFilterFactory.class, "persian_normalization" ).end() .builder( ScandinavianNormalizationFilterFactory.class, "scandinavian_normalization" ).end() .builder( ScandinavianFoldingFilterFactory.class, "scandinavian_folding" ).end() .builder( SerbianNormalizationFilterFactory.class, "serbian_normalization" ).end() .builder( CJKWidthFilterFactory.class, "cjk_width" ).end() .builder( CJKBigramFilterFactory.class, "cjk_bigram" ) // "ignored_scripts" array .transform( new CjkBigramIgnoredScriptsParametersTransformer() ) .rename( "outputUnigrams", "output_unigrams" ) .end() .builder( DelimitedPayloadTokenFilterFactory.class, "delimited_payload_filter" ) .rename( "encoder", "encoding" ) // custom class name is not supported .end() .builder( KeepWordFilterFactory.class, "keep" ) .rename( "words", "keep_words" ) .transform( "words", new WordSetFileParameterValueTransformer( resourceLoader ) ) .rename( "ignoreCase", "keep_words_case" ) .disallow( "enablePositionIncrements" ) .end() .builder( TypeTokenFilterFactory.class, "keep_types" ) .mandateAndStrip( "useWhitelist", "true" ) .transform( "types", new WordSetFileParameterValueTransformer( resourceLoader ) ) .disallow( "enablePositionIncrements" ) .end() .builder( ClassicFilterFactory.class, "classic" ).end() .builder( ApostropheFilterFactory.class, "apostrophe" ).end() .builder( DecimalDigitFilterFactory.class, "decimal_digit" ).end() .addJsonPassThrough( ElasticsearchTokenFilterFactory.class ) .build(); } @Override public void stop() { luceneAnalyzers = null; luceneCharFilters = null; luceneTokenizers = null; luceneTokenFilters = null; } @Override public String translate(Class<?> luceneClass) { String elasticsearchName = luceneAnalyzers.get( luceneClass.getName() ); if ( elasticsearchName == null ) { throw log.unsupportedAnalyzerImplementation( luceneClass ); } return elasticsearchName; } @Override public CharFilterDefinition translate(CharFilterDef hibernateSearchDef) { Class<? extends CharFilterFactory> factoryType = hibernateSearchDef.factory(); AnalysisDefinitionFactory<CharFilterDefinition> factory = luceneCharFilters.get( factoryType.getName() ); if ( factory == null ) { throw log.unsupportedCharFilterFactory( factoryType ); } Map<String, String> map = ParameterAnnotationsReader.toNewMutableMap( hibernateSearchDef.params() ); return factory.create( map ); } @Override public TokenizerDefinition translate(TokenizerDef hibernateSearchDef) { Class<? extends TokenizerFactory> factoryType = hibernateSearchDef.factory(); AnalysisDefinitionFactory<TokenizerDefinition> factory = luceneTokenizers.get( factoryType.getName() ); if ( factory == null ) { throw log.unsupportedTokenizerFactory( factoryType ); } Map<String, String> map = ParameterAnnotationsReader.toNewMutableMap( hibernateSearchDef.params() ); return factory.create( map ); } @Override public TokenFilterDefinition translate(TokenFilterDef hibernateSearchDef) { Class<? extends TokenFilterFactory> factoryType = hibernateSearchDef.factory(); AnalysisDefinitionFactory<TokenFilterDefinition> factory = luceneTokenFilters.get( factoryType.getName() ); if ( factory == null ) { throw log.unsupportedTokenFilterFactory( factoryType ); } Map<String, String> map = ParameterAnnotationsReader.toNewMutableMap( hibernateSearchDef.params() ); return factory.create( map ); } private class TokenizerClassNameToElasticsearchTypeNameTransformer implements ParameterValueTransformer { private final Class<?> factoryClass; private final String parameterName; public TokenizerClassNameToElasticsearchTypeNameTransformer(Class<?> factoryClass, String parameterName) { this.factoryClass = factoryClass; this.parameterName = parameterName; } @Override public JsonElement transform(String parameterValue) { AnalysisDefinitionFactory<?> factory = DefaultElasticsearchAnalyzerDefinitionTranslator.this.luceneTokenizers.get( parameterValue ); if ( factory == null ) { throw log.unsupportedAnalysisFactoryTokenizerClassNameParameter( factoryClass, parameterName, parameterValue ); } return new JsonPrimitive( factory.getType() ); } @Override public String toString() { return new StringBuilder( getClass().getSimpleName() ) .append( "[" ) .append( factoryClass ) .append( "," ) .append( parameterName ) .append( "]" ) .toString(); } } }