/* * Hibernate, Relational Persistence for Idiomatic Java * * Copyright (c) 2010, Red Hat, Inc. and/or its affiliates or third-party contributors as * indicated by the @author tags or express copyright attribution * statements applied by the authors. All third-party contributions are * distributed under license by Red Hat, Inc. * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.hibernate.search.test.analyzer.solr; import org.apache.lucene.index.Term; import org.apache.lucene.search.TermQuery; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.hibernate.Transaction; import org.hibernate.cfg.Configuration; import org.hibernate.search.FullTextSession; import org.hibernate.search.Search; import org.hibernate.search.test.SearchTestCase; import org.hibernate.search.util.AnalyzerUtils; import static org.hibernate.search.test.analyzer.AnalyzerTest.assertTokensEqual; /** * Tests the Solr analyzer creation framework. * This test might be affected by the version of the Analyzers being used. If it was to fail * after an upgrade of a Lucene or Solr version, make sure the new dependency still respects * the value Version.LUCENE_30, or update the test (older enum values are eventually deprecated * and not enforced anymore). * * @author Emmanuel Bernard * @author Hardy Ferentschik */ public class SolrAnalyzerTest extends SearchTestCase { /** * Tests that the token filters applied to <code>Team</code> are successfully created and used. Refer to * <code>Team</code> to see the exact definitions. * * @throws Exception in case the test fails */ public void testAnalyzerDef() throws Exception { // create the test instance Team team = new Team(); team.setDescription( "This is a D\u00E0scription" ); // \u00E0 == � - ISOLatin1AccentFilterFactory should strip of diacritic team.setLocation( "Atlanta" ); team.setName( "ATL team" ); // persist and index the test object FullTextSession fts = Search.getFullTextSession( openSession() ); Transaction tx = fts.beginTransaction(); fts.persist( team ); tx.commit(); fts.clear(); // execute several search to show that the right tokenizers were applies tx = fts.beginTransaction(); TermQuery query = new TermQuery( new Term( "description", "D\u00E0scription" ) ); assertEquals( "iso latin filter should work. � should be a now", 0, fts.createFullTextQuery( query ).list().size() ); query = new TermQuery( new Term( "description", "is" ) ); assertEquals( "stop word filter should work. is should be removed", 0, fts.createFullTextQuery( query ).list().size() ); query = new TermQuery( new Term( "description", "dascript" ) ); assertEquals( "snowball stemmer should work. 'dascription' should be stemmed to 'dascript'", 1, fts.createFullTextQuery( query ).list().size() ); // cleanup fts.delete( fts.createFullTextQuery( query ).list().get( 0 ) ); tx.commit(); fts.close(); } /** * Tests the analyzers defined on {@link Team}. * * @throws Exception in case the test fails. */ public void testAnalyzers() throws Exception { FullTextSession fts = Search.getFullTextSession( openSession() ); Analyzer analyzer = fts.getSearchFactory().getAnalyzer( "standard_analyzer" ); String text = "This is just FOOBAR's"; Token[] tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "This", "is", "just", "FOOBAR" } ); analyzer = fts.getSearchFactory().getAnalyzer( "html_standard_analyzer" ); text = "This is <b>foo</b><i>bar's</i>"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "This", "is", "foobar" } ); analyzer = fts.getSearchFactory().getAnalyzer( "html_whitespace_analyzer" ); text = "This is <b>foo</b><i>bar's</i>"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "This", "is", "foobar's" } ); analyzer = fts.getSearchFactory().getAnalyzer( "trim_analyzer" ); text = " Kittens! "; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "kittens" } ); analyzer = fts.getSearchFactory().getAnalyzer( "length_analyzer" ); text = "ab abc abcd abcde abcdef"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "abc", "abcd", "abcde" } ); analyzer = fts.getSearchFactory().getAnalyzer( "length_analyzer" ); text = "ab abc abcd abcde abcdef"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "abc", "abcd", "abcde" } ); analyzer = fts.getSearchFactory().getAnalyzer( "porter_analyzer" ); text = "bikes bikes biking"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "bike", "bike", "bike" } ); analyzer = fts.getSearchFactory().getAnalyzer( "word_analyzer" ); text = "CamelCase"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "Camel", "Case" } ); analyzer = fts.getSearchFactory().getAnalyzer( "synonym_analyzer" ); text = "ipod cosmos"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "ipod", "i-pod", "universe", "cosmos" } ); analyzer = fts.getSearchFactory().getAnalyzer( "shingle_analyzer" ); text = "please divide this sentence into shingles"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "please", "please divide", "divide", "divide this", "this", "this sentence", "sentence", "sentence into", "into", "into shingles", "shingles" } ); analyzer = fts.getSearchFactory().getAnalyzer( "phonetic_analyzer" ); text = "The quick brown fox jumped over the lazy dogs"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); AnalyzerUtils.displayTokens( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "0", "KK", "BRN", "FKS", "JMPT", "OFR", "0", "LS", "TKS" } ); analyzer = fts.getSearchFactory().getAnalyzer( "pattern_analyzer" ); text = "foo,bar"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "foo", "bar" } ); // CharStreamFactories test analyzer = fts.getSearchFactory().getAnalyzer( "mapping_char_analyzer" ); text = "CORA\u00C7\u00C3O DE MEL\u00C3O"; tokens = AnalyzerUtils.tokensFromAnalysis( analyzer, "name", text ); assertTokensEqual( tokens, new String[] { "CORACAO", "DE", "MELAO" } ); fts.close(); } protected Class<?>[] getAnnotatedClasses() { return new Class[] { Team.class }; } protected void configure(Configuration cfg) { super.configure( cfg ); cfg.setProperty( "hibernate.search.lucene_version", org.apache.lucene.util.Version.LUCENE_30.name() ); } }