/* * Hibernate Search, full-text search for your domain model * * License: GNU Lesser General Public License (LGPL), version 2.1 or later * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. */ package org.hibernate.search.elasticsearch.test; import static org.fest.assertions.Assertions.assertThat; import static org.fest.assertions.MapAssert.entry; import java.lang.annotation.Annotation; import java.util.Map; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilterFactory; import org.apache.lucene.analysis.cjk.CJKBigramFilterFactory; import org.apache.lucene.analysis.core.TypeTokenFilterFactory; import org.apache.lucene.analysis.core.WhitespaceTokenizerFactory; import org.apache.lucene.analysis.no.NorwegianLightStemFilterFactory; import org.apache.lucene.analysis.pattern.PatternCaptureGroupFilterFactory; import org.apache.lucene.analysis.pattern.PatternReplaceFilterFactory; import org.apache.lucene.analysis.standard.StandardFilterFactory; import org.apache.lucene.analysis.standard.StandardTokenizerFactory; import org.apache.lucene.analysis.synonym.SynonymFilterFactory; import org.apache.lucene.analysis.util.TokenizerFactory; import org.hibernate.annotations.common.annotationfactory.AnnotationDescriptor; import org.hibernate.annotations.common.annotationfactory.AnnotationFactory; import org.hibernate.search.annotations.CharFilterDef; import org.hibernate.search.annotations.Parameter; import org.hibernate.search.annotations.TokenFilterDef; import org.hibernate.search.annotations.TokenizerDef; import org.hibernate.search.elasticsearch.analyzer.ElasticsearchTokenFilterFactory; import org.hibernate.search.elasticsearch.impl.JsonBuilder; import org.hibernate.search.elasticsearch.settings.impl.model.CharFilterDefinition; import org.hibernate.search.elasticsearch.settings.impl.model.TokenFilterDefinition; import org.hibernate.search.elasticsearch.settings.impl.model.TokenizerDefinition; import org.hibernate.search.elasticsearch.settings.impl.translation.DefaultElasticsearchAnalyzerDefinitionTranslator; import org.hibernate.search.exception.SearchException; import org.hibernate.search.testsupport.TestForIssue; import org.hibernate.search.testsupport.setup.BuildContextForTest; import org.hibernate.search.testsupport.setup.SearchConfigurationForTest; import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import com.google.gson.JsonPrimitive; /** * @author Yoann Rodiere */ public class DefaultElasticsearchAnalyzerDefinitionTranslatorTest { @Rule public ExpectedException thrown = ExpectedException.none(); private DefaultElasticsearchAnalyzerDefinitionTranslator translator = new DefaultElasticsearchAnalyzerDefinitionTranslator(); @Before public void setup() { translator.start( null, new BuildContextForTest( new SearchConfigurationForTest() ) ); } @After public void tearDown() { translator.stop(); } @Test public void unknownClass() { TokenizerDef annotation = annotation( TokenizerDef.class, CustomTokenizerFactory.class ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400059" ); thrown.expectMessage( CustomTokenizerFactory.class.getSimpleName() ); translator.translate( annotation ); } @Test public void translateType() { TokenFilterDef annotation = annotation( TokenFilterDef.class, StandardFilterFactory.class ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getType() ).as( "type" ).isEqualTo( "standard" ); } @Test public void renameParameter() { TokenizerDef annotation = annotation( TokenizerDef.class, StandardTokenizerFactory.class, param( "maxTokenLength", "5" ) ); TokenizerDefinition definition = translator.translate( annotation ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "max_token_length", new JsonPrimitive( "5" ) ) ); assertThat( definition.getParameters().keySet() ).as( "parameter names" ) .excludes( "maxTokenLength" ); } @Test public void disallowParameter() { TokenizerDef annotation = annotation( TokenizerDef.class, WhitespaceTokenizerFactory.class, param( "rule", "foo" ) ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400061" ); thrown.expectMessage( WhitespaceTokenizerFactory.class.getSimpleName() ); thrown.expectMessage( "'rule'" ); translator.translate( annotation ); } @Test public void transformParameter() { CharFilterDef annotation = annotation( CharFilterDef.class, HTMLStripCharFilterFactory.class, param( "escapedTags", "foo,bar" ) ); CharFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "escaped_tags", JsonBuilder.array().add( new JsonPrimitive( "foo" ) ).add( new JsonPrimitive( "bar" ) ).build() ) ); } @Test public void transformParameter_tokenizerClass() { TokenFilterDef annotation = annotation( TokenFilterDef.class, SynonymFilterFactory.class, param( "tokenizerFactory", WhitespaceTokenizerFactory.class.getName() ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "tokenizer", new JsonPrimitive( "whitespace" ) ) ); } @Test public void transformParameter_tokenizerClass_unknownClass() { TokenFilterDef annotation = annotation( TokenFilterDef.class, SynonymFilterFactory.class, param( "tokenizerFactory", CustomTokenizerFactory.class.getName() ) ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400062" ); thrown.expectMessage( SynonymFilterFactory.class.getSimpleName() ); thrown.expectMessage( "'tokenizerFactory'" ); translator.translate( annotation ); } @Test public void transformParameter_singleElementArray() { TokenFilterDef annotation = annotation( TokenFilterDef.class, PatternCaptureGroupFilterFactory.class, param( "pattern", "foo" ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "patterns", JsonBuilder.array().add( new JsonPrimitive( "foo" ) ).build() ) ); } @Test public void transformParameter_norwegianStemmer() { TokenFilterDef annotation = annotation( TokenFilterDef.class, NorwegianLightStemFilterFactory.class ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getType() ).as( "type" ).isEqualTo( "stemmer" ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "name", new JsonPrimitive( "light_norwegian" ) ) ); } @Test public void transformParameter_norwegianStemmer_bokmal() { TokenFilterDef annotation = annotation( TokenFilterDef.class, NorwegianLightStemFilterFactory.class, param( "variant", "nb" ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getType() ).as( "type" ).isEqualTo( "stemmer" ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "name", new JsonPrimitive( "light_norwegian" ) ) ); } @Test public void transformParameter_norwegianStemmer_nynorsk() { TokenFilterDef annotation = annotation( TokenFilterDef.class, NorwegianLightStemFilterFactory.class, param( "variant", "nn" ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getType() ).as( "type" ).isEqualTo( "stemmer" ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "name", new JsonPrimitive( "light_nynorsk" ) ) ); } @Test public void transformParameter_norwegianStemmer_invalid() { TokenFilterDef annotation = annotation( TokenFilterDef.class, NorwegianLightStemFilterFactory.class, param( "variant", "invalid" ) ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400063" ); thrown.expectMessage( NorwegianLightStemFilterFactory.class.getSimpleName() ); thrown.expectMessage( "'variant'" ); thrown.expectMessage( "'invalid'" ); translator.translate( annotation ); } @Test public void transformParameter_patternReplace() { TokenFilterDef annotation = annotation( TokenFilterDef.class, PatternReplaceFilterFactory.class, param( "replace", "first" ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "all", new JsonPrimitive( "false" ) ) ); } @Test public void transformParameter_cjkBigramIgnoredScripts() { TokenFilterDef annotation = annotation( TokenFilterDef.class, CJKBigramFilterFactory.class, param( "outputUnigrams", "true" ), param( "han", "false" ), param( "hiragana", "false" ), param( "katakana", "false" ), param( "hangul", "false" ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "output_unigrams", new JsonPrimitive( "true" ) ) ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "ignored_scripts", JsonBuilder.array() .add( new JsonPrimitive( "han" ) ) .add( new JsonPrimitive( "hiragana" ) ) .add( new JsonPrimitive( "katakana" ) ) .add( new JsonPrimitive( "hangul" ) ) .build() ) ); assertThat( definition.getParameters().keySet() ).as( "parameter names" ) .excludes( "han", "hiragana", "katakana", "hangul", "outputUnigrams" ); } @Test @TestForIssue(jiraKey = "HSEARCH-2642") public void typeToken_blacklist_implicit() { TokenFilterDef annotation = annotation( TokenFilterDef.class, TypeTokenFilterFactory.class, param( "types", "org/hibernate/search/elasticsearch/test/typeTokens.properties" ) ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400084" ); thrown.expectMessage( TypeTokenFilterFactory.class.getSimpleName() ); thrown.expectMessage( "'useWhitelist'" ); thrown.expectMessage( "'null'" ); translator.translate( annotation ); } @Test @TestForIssue(jiraKey = "HSEARCH-2642") public void typeToken_blacklist_explicit() { TokenFilterDef annotation = annotation( TokenFilterDef.class, TypeTokenFilterFactory.class, param( "types", "org/hibernate/search/elasticsearch/test/typeTokens.properties" ), param( "useWhitelist", "false" ) ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400084" ); thrown.expectMessage( TypeTokenFilterFactory.class.getSimpleName() ); thrown.expectMessage( "'useWhitelist'" ); thrown.expectMessage( "'false'" ); translator.translate( annotation ); } @Test @TestForIssue(jiraKey = "HSEARCH-2642") public void typeToken_whitelist() { TokenFilterDef annotation = annotation( TokenFilterDef.class, TypeTokenFilterFactory.class, param( "types", "org/hibernate/search/elasticsearch/test/typeTokens.properties" ), param( "useWhitelist", "true" ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getType() ).as( "type" ).isEqualTo( "keep_types" ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "types", JsonBuilder.array() .add( new JsonPrimitive( "<FOO>" ) ) .add( new JsonPrimitive( "<BAR>" ) ) .build() ) ); // No other parameter is expected, particularly not "useWhitelist" assertThat( definition.getParameters() ).as( "parameters" ).hasSize( 1 ); } @Test public void passThrough() { TokenFilterDef annotation = annotation( TokenFilterDef.class, ElasticsearchTokenFilterFactory.class, param( "type", "'foo'" ), param( "string", "'foo'" ), param( "boolean", "true" ), param( "integer", "42" ), param( "string_array", "['a','b']" ), param( "integer_array", "[1,2]" ), param( "object", "{'a':42}" ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getType() ).as( "type" ).isEqualTo( "foo" ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "string", new JsonPrimitive( "foo" ) ), entry( "boolean", new JsonPrimitive( true ) ), entry( "integer", new JsonPrimitive( 42 ) ), entry( "string_array", JsonBuilder.array() .add( new JsonPrimitive( "a" ) ) .add( new JsonPrimitive( "b" ) ) .build() ), entry( "integer_array", JsonBuilder.array() .add( new JsonPrimitive( 1 ) ) .add( new JsonPrimitive( 2 ) ) .build() ), entry( "object", JsonBuilder.object() .add( "a", new JsonPrimitive( 42 ) ) .build() ) ); assertThat( definition.getParameters().keySet() ).as( "parameters" ) .excludes( "type" ); } @Test public void passThrough_stringWithoutQuotes() { TokenFilterDef annotation = annotation( TokenFilterDef.class, ElasticsearchTokenFilterFactory.class, param( "type", "stringWithoutQuotes" ), param( "param", "stringWithoutQuotes" ) ); TokenFilterDefinition definition = translator.translate( annotation ); assertThat( definition.getType() ).as( "type" ).isEqualTo( "stringWithoutQuotes" ); assertThat( definition.getParameters() ).as( "parameters" ) .includes( entry( "param", new JsonPrimitive( "stringWithoutQuotes" ) ) ); assertThat( definition.getParameters().keySet() ).as( "parameters" ) .excludes( "type" ); } @Test public void passThrough_nonJsonType() { TokenFilterDef annotation = annotation( TokenFilterDef.class, ElasticsearchTokenFilterFactory.class, param( "type", "[" ) ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400065" ); thrown.expectMessage( ElasticsearchTokenFilterFactory.class.getSimpleName() ); thrown.expectMessage( "'type'" ); translator.translate( annotation ); } @Test public void passThrough_nonStringType() { TokenFilterDef annotation = annotation( TokenFilterDef.class, ElasticsearchTokenFilterFactory.class, param( "type", "{'foo':'bar'}" ) ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400065" ); thrown.expectMessage( ElasticsearchTokenFilterFactory.class.getSimpleName() ); thrown.expectMessage( "'type'" ); translator.translate( annotation ); } @Test public void passThrough_nonJsonParameter() { TokenFilterDef annotation = annotation( TokenFilterDef.class, ElasticsearchTokenFilterFactory.class, param( "type", "'foo'" ), param( "param", "{" ) ); thrown.expect( SearchException.class ); thrown.expectMessage( "HSEARCH400066" ); thrown.expectMessage( ElasticsearchTokenFilterFactory.class.getSimpleName() ); thrown.expectMessage( "'param'" ); translator.translate( annotation ); } private static <T extends Annotation> T annotation(Class<T> annotationType, Class<?> factoryType, Parameter ... parameters) { AnnotationDescriptor descriptor = new AnnotationDescriptor( annotationType ); descriptor.setValue( "factory", factoryType ); descriptor.setValue( "params", parameters ); return AnnotationFactory.create( descriptor ); } private static Parameter param(String name, String value) { AnnotationDescriptor descriptor = new AnnotationDescriptor( Parameter.class ); descriptor.setValue( "name", name ); descriptor.setValue( "value", value ); return AnnotationFactory.create( descriptor ); } private abstract static class CustomTokenizerFactory extends TokenizerFactory { protected CustomTokenizerFactory(Map<String, String> args) { super( args ); } } }