/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.solr.spelling; import java.util.Collection; import java.util.LinkedHashMap; import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.util.LuceneTestCase.SuppressTempFileChecks; import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.util.NamedList; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.SpellCheckComponent; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.util.RefCounted; import org.junit.BeforeClass; import org.junit.Test; @SuppressTempFileChecks(bugUrl = "https://issues.apache.org/jira/browse/SOLR-1877 Spellcheck IndexReader leak bug?") public class WordBreakSolrSpellCheckerTest extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { initCore("solrconfig-spellcheckcomponent.xml","schema.xml"); assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "pain table paintablepine pi ne in able"))); assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "paint able pineapple goodness in"))); assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "pa in table pineapplegoodness"))); assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "printable line in ample food mess"))); assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "printable in pointable paint able"))); assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt", "printable in puntable paint able "))); assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "paint able in pintable plantable"))); assertNull(h.validateUpdate(adoc("id", "7", "lowerfilt", "zxcvqwtp fg hj"))); assertNull(h.validateUpdate(commit())); //docfreq=7: in //docfreq=5: able //docfreq=4: paint //docfreq=3: printable //docfreq=2: table //docfreq=1: {all others} } @Test public void testStandAlone() throws Exception { SolrCore core = h.getCore(); WordBreakSolrSpellChecker checker = new WordBreakSolrSpellChecker(); NamedList<String> params = new NamedList<>(); params.add("field", "lowerfilt"); params.add(WordBreakSolrSpellChecker.PARAM_BREAK_WORDS, "true"); params.add(WordBreakSolrSpellChecker.PARAM_COMBINE_WORDS, "true"); params.add(WordBreakSolrSpellChecker.PARAM_MAX_CHANGES, "10"); checker.init(params, core); RefCounted<SolrIndexSearcher> searcher = core.getSearcher(); QueryConverter qc = new SpellingQueryConverter(); qc.setAnalyzer(new MockAnalyzer(random())); { //Prior to SOLR-8175, the required term would cause an AIOOBE. Collection<Token> tokens = qc.convert("+pine apple good ness"); SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getIndexReader(), 10); SpellingResult result = checker.getSuggestions(spellOpts); searcher.decref(); assertTrue(result != null && result.getSuggestions() != null); assertTrue(result.getSuggestions().size()==5); } Collection<Token> tokens = qc.convert("paintable pine apple good ness"); SpellingOptions spellOpts = new SpellingOptions(tokens, searcher.get().getIndexReader(), 10); SpellingResult result = checker.getSuggestions(spellOpts); searcher.decref(); assertTrue(result != null && result.getSuggestions() != null); assertTrue(result.getSuggestions().size()==9); for(Map.Entry<Token, LinkedHashMap<String, Integer>> s : result.getSuggestions().entrySet()) { Token orig = s.getKey(); String[] corr = s.getValue().keySet().toArray(new String[0]); if(orig.toString().equals("paintable")) { assertTrue(orig.startOffset()==0); assertTrue(orig.endOffset()==9); assertTrue(orig.length()==9); assertTrue(corr.length==3); assertTrue(corr[0].equals("paint able")); //1 op ; max doc freq=5 assertTrue(corr[1].equals("pain table")); //1 op ; max doc freq=2 assertTrue(corr[2].equals("pa in table")); //2 ops } else if(orig.toString().equals("pine apple")) { assertTrue(orig.startOffset()==10); assertTrue(orig.endOffset()==20); assertTrue(orig.length()==10); assertTrue(corr.length==1); assertTrue(corr[0].equals("pineapple")); } else if(orig.toString().equals("paintable pine")) { assertTrue(orig.startOffset()==0); assertTrue(orig.endOffset()==14); assertTrue(orig.length()==14); assertTrue(corr.length==1); assertTrue(corr[0].equals("paintablepine")); } else if(orig.toString().equals("good ness")) { assertTrue(orig.startOffset()==21); assertTrue(orig.endOffset()==30); assertTrue(orig.length()==9); assertTrue(corr.length==1); assertTrue(corr[0].equals("goodness")); } else if(orig.toString().equals("pine apple good ness")) { assertTrue(orig.startOffset()==10); assertTrue(orig.endOffset()==30); assertTrue(orig.length()==20); assertTrue(corr.length==1); assertTrue(corr[0].equals("pineapplegoodness")); } else if(orig.toString().equals("pine")) { assertTrue(orig.startOffset()==10); assertTrue(orig.endOffset()==14); assertTrue(orig.length()==4); assertTrue(corr.length==1); assertTrue(corr[0].equals("pi ne")); } else if(orig.toString().equals("pine")) { assertTrue(orig.startOffset()==10); assertTrue(orig.endOffset()==14); assertTrue(orig.length()==4); assertTrue(corr.length==1); assertTrue(corr[0].equals("pi ne")); } else if(orig.toString().equals("apple")) { assertTrue(orig.startOffset()==15); assertTrue(orig.endOffset()==20); assertTrue(orig.length()==5); assertTrue(corr.length==0); } else if(orig.toString().equals("good")) { assertTrue(orig.startOffset()==21); assertTrue(orig.endOffset()==25); assertTrue(orig.length()==4); assertTrue(corr.length==0); } else if(orig.toString().equals("ness")) { assertTrue(orig.startOffset()==26); assertTrue(orig.endOffset()==30); assertTrue(orig.length()==4); assertTrue(corr.length==0); }else { fail("Unexpected original result: " + orig); } } } @Test public void testInConjunction() throws Exception { assertQ(req( "q", "lowerfilt:(paintable pine apple good ness)", "qt", "spellCheckWithWordbreak", "indent", "true", SpellCheckComponent.SPELLCHECK_BUILD, "true", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_ACCURACY, ".75", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true"), "//lst[@name='suggestions']/lst[1]/@name='paintable'", "//lst[@name='suggestions']/lst[2]/@name='pine'", "//lst[@name='suggestions']/lst[3]/@name='apple'", "//lst[@name='suggestions']/lst[4]/@name='good'", "//lst[@name='suggestions']/lst[5]/@name='ness'", "//lst[@name='paintable']/int[@name='numFound']=8", "//lst[@name='paintable']/int[@name='startOffset']=11", "//lst[@name='paintable']/int[@name='endOffset']=20", "//lst[@name='paintable']/int[@name='origFreq']=0", "//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/str[@name='word']='printable'", //SolrSpellChecker result interleaved "//lst[@name='paintable']/arr[@name='suggestion']/lst[1]/int[@name='freq']=3", "//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/str[@name='word']='paint able'", //1 op "//lst[@name='paintable']/arr[@name='suggestion']/lst[2]/int[@name='freq']=5", "//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/str[@name='word']='pintable'", //SolrSpellChecker result interleaved "//lst[@name='paintable']/arr[@name='suggestion']/lst[3]/int[@name='freq']=1", "//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/str[@name='word']='pain table'", //1 op "//lst[@name='paintable']/arr[@name='suggestion']/lst[4]/int[@name='freq']=2", "//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/str[@name='word']='pointable'", //SolrSpellChecker result interleaved "//lst[@name='paintable']/arr[@name='suggestion']/lst[5]/int[@name='freq']=1", "//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/str[@name='word']='pa in table'", //2 ops "//lst[@name='paintable']/arr[@name='suggestion']/lst[6]/int[@name='freq']=7", "//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/str[@name='word']='plantable'", //SolrSpellChecker result interleaved "//lst[@name='paintable']/arr[@name='suggestion']/lst[7]/int[@name='freq']=1", "//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/str[@name='word']='puntable'", //SolrSpellChecker result interleaved "//lst[@name='paintable']/arr[@name='suggestion']/lst[8]/int[@name='freq']=1", "//lst[@name='pine']/int[@name='numFound']=2", "//lst[@name='pine']/int[@name='startOffset']=21", "//lst[@name='pine']/int[@name='endOffset']=25", "//lst[@name='pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='line'", "//lst[@name='pine']/arr[@name='suggestion']/lst[2]/str[@name='word']='pi ne'", "//lst[@name='apple']/int[@name='numFound']=1", "//lst[@name='apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='ample'", "//lst[@name='good']/int[@name='numFound']=1", "//lst[@name='good']/arr[@name='suggestion']/lst[1]/str[@name='word']='food'", "//lst[@name='ness']/int[@name='numFound']=1", "//lst[@name='ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='mess'", "//lst[@name='pine apple']/int[@name='numFound']=1", "//lst[@name='pine apple']/int[@name='startOffset']=21", "//lst[@name='pine apple']/int[@name='endOffset']=31", "//lst[@name='pine apple']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapple'", "//lst[@name='paintable pine']/int[@name='numFound']=1", "//lst[@name='paintable pine']/int[@name='startOffset']=11", "//lst[@name='paintable pine']/int[@name='endOffset']=25", "//lst[@name='paintable pine']/arr[@name='suggestion']/lst[1]/str[@name='word']='paintablepine'", "//lst[@name='good ness']/int[@name='numFound']=1", "//lst[@name='good ness']/int[@name='startOffset']=32", "//lst[@name='good ness']/int[@name='endOffset']=41", "//lst[@name='good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='goodness'", "//lst[@name='pine apple good ness']/int[@name='numFound']=1", "//lst[@name='pine apple good ness']/int[@name='startOffset']=21", "//lst[@name='pine apple good ness']/int[@name='endOffset']=41", "//lst[@name='pine apple good ness']/arr[@name='suggestion']/lst[1]/str[@name='word']='pineapplegoodness'" ); } @Test public void testCollate() throws Exception { assertQ(req( "q", "lowerfilt:(paintable pine apple godness)", "qt", "spellCheckWithWordbreak", "indent", "true", SpellCheckComponent.SPELLCHECK_BUILD, "true", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_ACCURACY, ".75", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"), "//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:(printable line ample goodness)'", "//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(paintablepine ample goodness)'", "//lst[@name='collation'][3 ]/str[@name='collationQuery']='lowerfilt:(printable pineapple goodness)'", "//lst[@name='collation'][4 ]/str[@name='collationQuery']='lowerfilt:((paint able) line ample goodness)'", "//lst[@name='collation'][5 ]/str[@name='collationQuery']='lowerfilt:(printable (pi ne) ample goodness)'", "//lst[@name='collation'][6 ]/str[@name='collationQuery']='lowerfilt:((paint able) pineapple goodness)'", "//lst[@name='collation'][7 ]/str[@name='collationQuery']='lowerfilt:((paint able) (pi ne) ample goodness)'", "//lst[@name='collation'][8 ]/str[@name='collationQuery']='lowerfilt:(pintable line ample goodness)'", "//lst[@name='collation'][9 ]/str[@name='collationQuery']='lowerfilt:(pintable pineapple goodness)'", "//lst[@name='collation'][10]/str[@name='collationQuery']='lowerfilt:(pintable (pi ne) ample goodness)'", "//lst[@name='collation'][10]/lst[@name='misspellingsAndCorrections']/str[@name='paintable']='pintable'", "//lst[@name='collation'][10]/lst[@name='misspellingsAndCorrections']/str[@name='pine']='pi ne'", "//lst[@name='collation'][10]/lst[@name='misspellingsAndCorrections']/str[@name='apple']='ample'", "//lst[@name='collation'][10]/lst[@name='misspellingsAndCorrections']/str[@name='godness']='goodness'" ); assertQ(req( "q", "lowerfilt:(pine AND apple)", "qt", "spellCheckWithWordbreak", "indent", "true", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_ACCURACY, ".75", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"), "//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:(line AND ample)'", "//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(pineapple)'", "//lst[@name='collation'][3 ]/str[@name='collationQuery']='lowerfilt:((pi AND ne) AND ample)'" ); assertQ(req( "q", "lowerfilt:pine AND NOT lowerfilt:apple", "qt", "spellCheckWithWordbreak", "indent", "true", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_ACCURACY, ".75", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"), "//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:line AND NOT lowerfilt:ample'", "//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(pi AND ne) AND NOT lowerfilt:ample'" ); assertQ(req( "q", "lowerfilt:pine NOT lowerfilt:apple", "qt", "spellCheckWithWordbreak", "indent", "true", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_ACCURACY, ".75", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"), "//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:line NOT lowerfilt:ample'", "//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:(pi AND ne) NOT lowerfilt:ample'" ); assertQ(req( "q", "lowerfilt:(+pine -apple)", "qt", "spellCheckWithWordbreak", "indent", "true", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_ACCURACY, ".75", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"), "//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:(+line -ample)'", "//lst[@name='collation'][2 ]/str[@name='collationQuery']='lowerfilt:((+pi +ne) -ample)'" ); assertQ(req( "q", "lowerfilt:(+printableinpuntableplantable)", "qt", "spellCheckWithWordbreak", "indent", "true", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_ACCURACY, "1", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "1"), "//lst[@name='collation'][1 ]/str[@name='collationQuery']='lowerfilt:((+printable +in +puntable +plantable))'" ); assertQ(req( "q", "zxcv AND qwtp AND fghj", "qt", "spellCheckWithWordbreak", "defType", "edismax", "qf", "lowerfilt", "indent", "true", SpellCheckComponent.SPELLCHECK_BUILD, "true", SpellCheckComponent.COMPONENT_NAME, "true", SpellCheckComponent.SPELLCHECK_ACCURACY, ".75", SpellCheckComponent.SPELLCHECK_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_COLLATE, "true", SpellCheckComponent.SPELLCHECK_COLLATE_EXTENDED_RESULTS, "true", SpellCheckComponent.SPELLCHECK_MAX_COLLATIONS, "10"), "//lst[@name='collation'][1 ]/str[@name='collationQuery']='zxcvqwtp AND (fg AND hj)'" ); } }