/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.lucene.queries; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.QueryUtils; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.similarities.BM25Similarity; import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; import org.apache.lucene.util.TestUtil; import org.elasticsearch.test.ESTestCase; import java.io.IOException; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; public class BlendedTermQueryTests extends ESTestCase { public void testBooleanQuery() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); String[] firstNames = new String[]{ "simon", "paul" }; String[] surNames = new String[]{ "willnauer", "simon" }; for (int i = 0; i < surNames.length; i++) { Document d = new Document(); d.add(new TextField("id", Integer.toString(i), Field.Store.YES)); d.add(new TextField("firstname", firstNames[i], Field.Store.NO)); d.add(new TextField("surname", surNames[i], Field.Store.NO)); w.addDocument(d); } int iters = scaledRandomIntBetween(25, 100); for (int j = 0; j < iters; j++) { Document d = new Document(); d.add(new TextField("id", Integer.toString(firstNames.length + j), Field.Store.YES)); d.add(new TextField("firstname", rarely() ? "some_other_name" : "simon the sorcerer", Field.Store.NO)); // make sure length-norm is the tie-breaker d.add(new TextField("surname", "bogus", Field.Store.NO)); w.addDocument(d); } w.commit(); DirectoryReader reader = DirectoryReader.open(w); IndexSearcher searcher = setSimilarity(newSearcher(reader)); { Term[] terms = new Term[]{new Term("firstname", "simon"), new Term("surname", "simon")}; BlendedTermQuery query = BlendedTermQuery.booleanBlendedQuery(terms); TopDocs search = searcher.search(query, 3); ScoreDoc[] scoreDocs = search.scoreDocs; assertEquals(3, scoreDocs.length); assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue()); } { BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add(new TermQuery(new Term("firstname", "simon")), BooleanClause.Occur.SHOULD); query.add(new TermQuery(new Term("surname", "simon")), BooleanClause.Occur.SHOULD); TopDocs search = searcher.search(query.build(), 1); ScoreDoc[] scoreDocs = search.scoreDocs; assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue()); } reader.close(); w.close(); dir.close(); } public void testDismaxQuery() throws IOException { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(new MockAnalyzer(random()))); String[] username = new String[]{ "foo fighters", "some cool fan", "cover band"}; String[] song = new String[]{ "generator", "foo fighers - generator", "foo fighters generator" }; final boolean omitNorms = random().nextBoolean(); FieldType ft = new FieldType(TextField.TYPE_NOT_STORED); ft.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS); ft.setOmitNorms(omitNorms); ft.freeze(); FieldType ft1 = new FieldType(TextField.TYPE_NOT_STORED); ft1.setIndexOptions(random().nextBoolean() ? IndexOptions.DOCS : IndexOptions.DOCS_AND_FREQS); ft1.setOmitNorms(omitNorms); ft1.freeze(); for (int i = 0; i < username.length; i++) { Document d = new Document(); d.add(new TextField("id", Integer.toString(i), Field.Store.YES)); d.add(new Field("username", username[i], ft)); d.add(new Field("song", song[i], ft)); w.addDocument(d); } int iters = scaledRandomIntBetween(25, 100); for (int j = 0; j < iters; j++) { Document d = new Document(); d.add(new TextField("id", Integer.toString(username.length + j), Field.Store.YES)); d.add(new Field("username", "foo fighters", ft1)); d.add(new Field("song", "some bogus text to bump up IDF", ft1)); w.addDocument(d); } w.commit(); DirectoryReader reader = DirectoryReader.open(w); IndexSearcher searcher = setSimilarity(newSearcher(reader)); { String[] fields = new String[]{"username", "song"}; BooleanQuery.Builder query = new BooleanQuery.Builder(); query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 0.1f), BooleanClause.Occur.SHOULD); query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fighters"), 0.1f), BooleanClause.Occur.SHOULD); query.add(BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "generator"), 0.1f), BooleanClause.Occur.SHOULD); TopDocs search = searcher.search(query.build(), 10); ScoreDoc[] scoreDocs = search.scoreDocs; assertEquals(Integer.toString(0), reader.document(scoreDocs[0].doc).getField("id").stringValue()); } { BooleanQuery.Builder query = new BooleanQuery.Builder(); DisjunctionMaxQuery uname = new DisjunctionMaxQuery( Arrays.asList(new TermQuery(new Term("username", "foo")), new TermQuery(new Term("song", "foo"))), 0.0f); DisjunctionMaxQuery s = new DisjunctionMaxQuery( Arrays.asList(new TermQuery(new Term("username", "fighers")), new TermQuery(new Term("song", "fighers"))), 0.0f); DisjunctionMaxQuery gen = new DisjunctionMaxQuery( Arrays.asList(new TermQuery(new Term("username", "generator")), new TermQuery(new Term("song", "generator"))), 0f); query.add(uname, BooleanClause.Occur.SHOULD); query.add(s, BooleanClause.Occur.SHOULD); query.add(gen, BooleanClause.Occur.SHOULD); TopDocs search = searcher.search(query.build(), 4); ScoreDoc[] scoreDocs = search.scoreDocs; assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue()); } reader.close(); w.close(); dir.close(); } public void testBasics() { final int iters = scaledRandomIntBetween(5, 25); for (int j = 0; j < iters; j++) { String[] fields = new String[1 + random().nextInt(10)]; for (int i = 0; i < fields.length; i++) { fields[i] = TestUtil.randomRealisticUnicodeString(random(), 1, 10); } String term = TestUtil.randomRealisticUnicodeString(random(), 1, 10); Term[] terms = toTerms(fields, term); boolean useBoolean = random().nextBoolean(); float tieBreaker = random().nextFloat(); BlendedTermQuery query = useBoolean ? BlendedTermQuery.booleanBlendedQuery(terms) : BlendedTermQuery.dismaxBlendedQuery(terms, tieBreaker); QueryUtils.check(query); terms = toTerms(fields, term); BlendedTermQuery query2 = useBoolean ? BlendedTermQuery.booleanBlendedQuery(terms) : BlendedTermQuery.dismaxBlendedQuery(terms, tieBreaker); assertEquals(query, query2); } } public Term[] toTerms(String[] fields, String term) { Term[] terms = new Term[fields.length]; List<String> fieldsList = Arrays.asList(fields); Collections.shuffle(fieldsList, random()); fields = fieldsList.toArray(new String[0]); for (int i = 0; i < fields.length; i++) { terms[i] = new Term(fields[i], term); } return terms; } public IndexSearcher setSimilarity(IndexSearcher searcher) { Similarity similarity = random().nextBoolean() ? new BM25Similarity() : new ClassicSimilarity(); searcher.setSimilarity(similarity); return searcher; } public void testExtractTerms() throws IOException { Set<Term> terms = new HashSet<>(); int num = scaledRandomIntBetween(1, 10); for (int i = 0; i < num; i++) { terms.add(new Term(TestUtil.randomRealisticUnicodeString(random(), 1, 10), TestUtil.randomRealisticUnicodeString(random(), 1, 10))); } BlendedTermQuery blendedTermQuery = random().nextBoolean() ? BlendedTermQuery.dismaxBlendedQuery(terms.toArray(new Term[0]), random().nextFloat()) : BlendedTermQuery.booleanBlendedQuery(terms.toArray(new Term[0])); Set<Term> extracted = new HashSet<>(); IndexSearcher searcher = new IndexSearcher(new MultiReader()); searcher.createNormalizedWeight(blendedTermQuery, false).extractTerms(extracted); assertThat(extracted.size(), equalTo(terms.size())); assertThat(extracted, containsInAnyOrder(terms.toArray(new Term[0]))); } }