/* * Hibernate Search, full-text search for your domain model * * License: GNU Lesser General Public License (LGPL), version 2.1 or later * See the lgpl.txt file in the root directory or <http://www.gnu.org/licenses/lgpl-2.1.html>. */ package org.hibernate.search.test.analyzer; import java.util.HashSet; import java.util.Set; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.queryparser.classic.QueryParser; import org.hibernate.Transaction; import org.hibernate.search.FullTextQuery; import org.hibernate.search.FullTextSession; import org.hibernate.search.Search; import org.hibernate.search.SearchFactory; import org.hibernate.search.exception.SearchException; import org.hibernate.search.test.SearchTestBase; import org.hibernate.search.test.util.FullTextSessionBuilder; import org.hibernate.search.testsupport.TestConstants; import org.hibernate.search.testsupport.junit.SkipOnElasticsearch; import org.hibernate.search.util.AnalyzerUtils; import org.hibernate.search.util.logging.impl.Log; import org.hibernate.search.util.logging.impl.LoggerFactory; import org.junit.Assert; import org.junit.Test; import org.junit.experimental.categories.Category; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; /** * @author Emmanuel Bernard * @author Hardy Ferentschik */ @Category(SkipOnElasticsearch.class) // Custom analyzer implementations cannot be used with Elasticsearch public class AnalyzerTest extends SearchTestBase { public static final Log log = LoggerFactory.make(); @Test public void testAnalyzerDiscriminator() throws Exception { Article germanArticle = new Article(); germanArticle.setLanguage( "de" ); germanArticle.setText( "aufeinanderschl\u00FCgen" ); Set<Article> references = new HashSet<Article>(); references.add( germanArticle ); Article englishArticle = new Article(); englishArticle.setLanguage( "en" ); englishArticle.setText( "acknowledgment" ); englishArticle.setReferences( references ); FullTextSession s = Search.getFullTextSession( openSession() ); Transaction tx = s.beginTransaction(); s.persist( englishArticle ); tx.commit(); tx = s.beginTransaction(); // at query time we use a standard analyzer. We explicitly search for tokens which can only be found if the // right language specific stemmer was used at index time QueryParser parser = new QueryParser( "references.text", TestConstants.standardAnalyzer ); org.apache.lucene.search.Query luceneQuery = parser.parse( "aufeinanderschlug" ); FullTextQuery query = s.createFullTextQuery( luceneQuery ); assertEquals( 1, query.getResultSize() ); parser = new QueryParser( "text", TestConstants.standardAnalyzer ); luceneQuery = parser.parse( "acknowledg" ); query = s.createFullTextQuery( luceneQuery ); assertEquals( 1, query.getResultSize() ); tx.commit(); s.close(); } @Test public void testMultipleAnalyzerDiscriminatorDefinitions() { FullTextSessionBuilder builder = new FullTextSessionBuilder(); builder.addAnnotatedClass( BlogEntry.class ); try { builder.build(); fail(); } catch (SearchException e) { assertTrue( "Wrong error message", e.getMessage().startsWith( "Multiple AnalyzerDiscriminator defined in the same class hierarchy" ) ); } } @Test public void testScopedAnalyzers() throws Exception { MyEntity en = new MyEntity(); en.setEntity( "Entity" ); en.setField( "Field" ); en.setProperty( "Property" ); en.setComponent( new MyComponent() ); en.getComponent().setComponentProperty( "component property" ); FullTextSession s = Search.getFullTextSession( openSession() ); Transaction tx = s.beginTransaction(); s.persist( en ); tx.commit(); tx = s.beginTransaction(); QueryParser parser = new QueryParser( "id", TestConstants.standardAnalyzer ); org.apache.lucene.search.Query luceneQuery = parser.parse( "entity:alarm" ); FullTextQuery query = s.createFullTextQuery( luceneQuery, MyEntity.class ); assertEquals( 1, query.getResultSize() ); luceneQuery = parser.parse( "property:cat" ); query = s.createFullTextQuery( luceneQuery, MyEntity.class ); assertEquals( 1, query.getResultSize() ); luceneQuery = parser.parse( "field:energy" ); query = s.createFullTextQuery( luceneQuery, MyEntity.class ); assertEquals( 1, query.getResultSize() ); luceneQuery = parser.parse( "component.componentProperty:noise" ); query = s.createFullTextQuery( luceneQuery, MyEntity.class ); assertEquals( 1, query.getResultSize() ); s.delete( query.getSingleResult() ); tx.commit(); s.close(); } @Test public void testScopedAnalyzersFromSearchFactory() throws Exception { FullTextSession session = Search.getFullTextSession( openSession() ); SearchFactory searchFactory = session.getSearchFactory(); Analyzer analyzer = searchFactory.getAnalyzer( MyEntity.class ); // you can pass what so ever into the analysis since the used analyzers are // returning the same tokens all the time. We just want to make sure that // the right analyzers are used. Token[] tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "entity", "" ); assertTokensEqual( tokens, new String[] { "alarm", "dog", "performance" } ); tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "property", "" ); assertTokensEqual( tokens, new String[] { "sound", "cat", "speed" } ); tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "field", "" ); assertTokensEqual( tokens, new String[] { "music", "elephant", "energy" } ); tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "component.componentProperty", "" ); assertTokensEqual( tokens, new String[] { "noise", "mouse", "light" } ); // test border cases try { searchFactory.getAnalyzer( (Class) null ); } catch (IllegalArgumentException iae) { log.debug( "success" ); } try { searchFactory.getAnalyzer( String.class ); } catch (IllegalArgumentException iae) { log.debug( "success" ); } session.close(); } @Test public void testNotAnalyzedFieldAndScopedAnalyzer() throws Exception { FullTextSession session = Search.getFullTextSession( openSession() ); SearchFactory searchFactory = session.getSearchFactory(); Analyzer analyzer = searchFactory.getAnalyzer( MyEntity.class ); // you can pass what so ever into the analysis since the used analyzers are // returning the same tokens all the time. We just want to make sure that // the right analyzers are used. Token[] tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "notAnalyzed", "pass through" ); assertTokensEqual( tokens, new String[] { "pass through" } ); session.close(); } public static void assertTokensEqual(Token[] tokens, String[] strings) { Assert.assertEquals( strings.length, tokens.length ); for ( int i = 0; i < tokens.length; i++ ) { Assert.assertEquals( "index " + i, strings[i], AnalyzerUtils.getTermText( tokens[i] ) ); } } @Override public Class<?>[] getAnnotatedClasses() { return new Class[] { MyEntity.class, Article.class }; } }