/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.all;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.test.ESTestCase;
import org.junit.Test;
import java.io.IOException;
import static org.hamcrest.Matchers.equalTo;
/**
*
*/
public class SimpleAllTests extends ESTestCase {
@Test
public void testBoostOnEagerTokenizer() throws Exception {
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", "all", 2.0f);
allEntries.addText("field2", "your", 1.0f);
allEntries.addText("field1", "boosts", 0.5f);
allEntries.reset();
// whitespace analyzer's tokenizer reads characters eagerly on the contrary to the standard tokenizer
final TokenStream ts = AllTokenStream.allTokenStream("any", allEntries, new WhitespaceAnalyzer());
final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
final PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class);
ts.reset();
for (int i = 0; i < 3; ++i) {
assertTrue(ts.incrementToken());
final String term;
final float boost;
switch (i) {
case 0:
term = "all";
boost = 2;
break;
case 1:
term = "your";
boost = 1;
break;
case 2:
term = "boosts";
boost = 0.5f;
break;
default:
throw new AssertionError();
}
assertEquals(term, termAtt.toString());
final BytesRef payload = payloadAtt.getPayload();
if (payload == null || payload.length == 0) {
assertEquals(boost, 1f, 0.001f);
} else {
assertEquals(4, payload.length);
final float b = PayloadHelper.decodeFloat(payload.bytes, payload.offset);
assertEquals(boost, b, 0.001f);
}
}
assertFalse(ts.incrementToken());
}
@Test
public void testAllEntriesRead() throws Exception {
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", "something", 1.0f);
allEntries.addText("field2", "else", 1.0f);
for (int i = 1; i < 30; i++) {
allEntries.reset();
char[] data = new char[i];
String value = slurpToString(allEntries, data);
assertThat("failed for " + i, value, equalTo("something else"));
}
}
private String slurpToString(AllEntries allEntries, char[] data) throws IOException {
StringBuilder sb = new StringBuilder();
while (true) {
int read = allEntries.read(data, 0, data.length);
if (read == -1) {
break;
}
sb.append(data, 0, read);
}
return sb.toString();
}
private void assertExplanationScore(IndexSearcher searcher, Query query, ScoreDoc scoreDoc) throws IOException {
final Explanation expl = searcher.explain(query, scoreDoc.doc);
assertEquals(scoreDoc.score, expl.getValue(), 0.00001f);
}
@Test
public void testSimpleAllNoBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", "something", 1.0f);
allEntries.addText("field2", "else", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
allEntries = new AllEntries();
allEntries.addText("field1", "else", 1.0f);
allEntries.addText("field2", "something", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter, true);
IndexSearcher searcher = new IndexSearcher(reader);
Query query = new AllTermQuery(new Term("_all", "else"));
TopDocs docs = searcher.search(query, 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
assertThat(docs.scoreDocs[1].doc, equalTo(1));
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
query = new AllTermQuery(new Term("_all", "something"));
docs = searcher.search(query, 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
assertThat(docs.scoreDocs[1].doc, equalTo(1));
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
indexWriter.close();
}
@Test
public void testSimpleAllWithBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", "something", 1.0f);
allEntries.addText("field2", "else", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
allEntries = new AllEntries();
allEntries.addText("field1", "else", 2.0f);
allEntries.addText("field2", "something", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter, true);
IndexSearcher searcher = new IndexSearcher(reader);
// this one is boosted. so the second doc is more relevant
Query query = new AllTermQuery(new Term("_all", "else"));
TopDocs docs = searcher.search(query, 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(1));
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
assertThat(docs.scoreDocs[1].doc, equalTo(0));
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
query = new AllTermQuery(new Term("_all", "something"));
docs = searcher.search(query, 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertExplanationScore(searcher, query, docs.scoreDocs[0]);
assertThat(docs.scoreDocs[1].doc, equalTo(1));
assertExplanationScore(searcher, query, docs.scoreDocs[1]);
indexWriter.close();
}
@Test
public void testTermMissingFromOneSegment() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
AllEntries allEntries = new AllEntries();
allEntries.addText("field", "something", 2.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
indexWriter.commit();
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
allEntries = new AllEntries();
allEntries.addText("field", "else", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter, true);
assertEquals(2, reader.leaves().size());
IndexSearcher searcher = new IndexSearcher(reader);
// "something" only appears in the first segment:
Query query = new AllTermQuery(new Term("_all", "something"));
TopDocs docs = searcher.search(query, 10);
assertEquals(1, docs.totalHits);
indexWriter.close();
}
public void testMultipleTokensAllNoBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", "something moo", 1.0f);
allEntries.addText("field2", "else koo", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
allEntries = new AllEntries();
allEntries.addText("field1", "else koo", 1.0f);
allEntries.addText("field2", "something moo", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter, true);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
indexWriter.close();
}
@Test
public void testMultipleTokensAllWithBoost() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
AllEntries allEntries = new AllEntries();
allEntries.addText("field1", "something moo", 1.0f);
allEntries.addText("field2", "else koo", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("_id", "2", StoredField.TYPE));
allEntries = new AllEntries();
allEntries.addText("field1", "else koo", 2.0f);
allEntries.addText("field2", "something moo", 1.0f);
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.STANDARD_ANALYZER)));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter, true);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new AllTermQuery(new Term("_all", "else")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(1));
assertThat(docs.scoreDocs[1].doc, equalTo(0));
docs = searcher.search(new AllTermQuery(new Term("_all", "koo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(1));
assertThat(docs.scoreDocs[1].doc, equalTo(0));
docs = searcher.search(new AllTermQuery(new Term("_all", "something")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
docs = searcher.search(new AllTermQuery(new Term("_all", "moo")), 10);
assertThat(docs.totalHits, equalTo(2));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
assertThat(docs.scoreDocs[1].doc, equalTo(1));
indexWriter.close();
}
@Test
public void testNoTokensWithKeywordAnalyzer() throws Exception {
Directory dir = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.KEYWORD_ANALYZER));
Document doc = new Document();
doc.add(new Field("_id", "1", StoredField.TYPE));
AllEntries allEntries = new AllEntries();
allEntries.reset();
doc.add(new TextField("_all", AllTokenStream.allTokenStream("_all", allEntries, Lucene.KEYWORD_ANALYZER)));
indexWriter.addDocument(doc);
IndexReader reader = DirectoryReader.open(indexWriter, true);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs docs = searcher.search(new MatchAllDocsQuery(), 10);
assertThat(docs.totalHits, equalTo(1));
assertThat(docs.scoreDocs[0].doc, equalTo(0));
}
public void testEquals() {
Term bar = new Term("foo", "bar");
Term baz = new Term("foo", "baz");
assertEquals(new AllTermQuery(bar), new AllTermQuery(bar));
assertNotEquals(new AllTermQuery(bar), new AllTermQuery(baz));
assertEquals(new AllTermQuery(bar).hashCode(), new AllTermQuery(bar).hashCode());
assertNotEquals(new AllTermQuery(bar).hashCode(), new AllTermQuery(baz).hashCode());
}
}