/*
* Licensed to the Apache Software Foundation (ASF) under one or more contributor license
* agreements. See the NOTICE file distributed with this work for additional information regarding
* copyright ownership. The ASF licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License. You may obtain a
* copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.geode.cache.lucene;
import static org.apache.geode.cache.lucene.test.LuceneTestUtilities.*;
import static org.junit.Assert.*;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.util.CharTokenizer;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.search.Query;
import org.junit.Ignore;
import org.junit.Rule;
import org.junit.Test;
import org.junit.contrib.java.lang.system.SystemOutRule;
import org.junit.experimental.categories.Category;
import org.junit.rules.ExpectedException;
import org.apache.geode.DataSerializer;
import org.apache.geode.cache.Region;
import org.apache.geode.cache.RegionShortcut;
import org.apache.geode.cache.lucene.test.TestObject;
import org.apache.geode.internal.DataSerializableFixedID;
import org.apache.geode.internal.Version;
import org.apache.geode.internal.cache.PartitionAttributesImpl;
import org.apache.geode.pdx.JSONFormatter;
import org.apache.geode.pdx.PdxInstance;
import org.apache.geode.pdx.internal.AutoSerializableManager.ObjectArrayField;
import org.apache.geode.test.junit.categories.IntegrationTest;
/**
* This class contains tests of lucene queries that can fit
*/
@Category(IntegrationTest.class)
public class LuceneQueriesIntegrationTest extends LuceneIntegrationTest {
@Rule
public ExpectedException thrown = ExpectedException.none();
private static final String INDEX_NAME = "index";
protected static final String REGION_NAME = "index";
private Region region;
@Test()
public void shouldNotTokenizeWordsWithKeywordAnalyzer() throws Exception {
Map<String, Analyzer> fields = new HashMap<String, Analyzer>();
fields.put("field1", new StandardAnalyzer());
fields.put("field2", new KeywordAnalyzer());
luceneService.createIndex(INDEX_NAME, REGION_NAME, fields);
Region region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
// Put two values with some of the same tokens
String value1 = "one three";
String value2 = "one two three";
String value3 = "one@three";
region.put("A", new TestObject(value1, value1));
region.put("B", new TestObject(value2, value2));
region.put("C", new TestObject(value3, value3));
// The value will be tokenized into following documents using the analyzers:
// <field1:one three> <field2:one three>
// <field1:one two three> <field2:one two three>
// <field1:one@three> <field2:one@three>
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
// standard analyzer with double quote
// this query string will be parsed as "one three"
// but standard analyzer will parse value "one@three" to be "one three"
// query will be--fields1:"one three"
// so C will be hit by query
verifyQuery("field1:\"one three\"", DEFAULT_FIELD, "A", "C");
// standard analyzer will not tokenize by '_'
// this query string will be parsed as "one_three"
// query will be--field1:one_three
verifyQuery("field1:one_three", DEFAULT_FIELD);
// standard analyzer will tokenize by '@'
// this query string will be parsed as "one" "three"
// query will be--field1:one field1:three
verifyQuery("field1:one@three", DEFAULT_FIELD, "A", "B", "C");
HashMap expectedResults = new HashMap();
expectedResults.put("A", new TestObject(value1, value1));
expectedResults.put("B", new TestObject(value2, value2));
expectedResults.put("C", new TestObject(value3, value3));
verifyQuery("field1:one@three", DEFAULT_FIELD, expectedResults);
// keyword analyzer, this query will only match the entry that exactly matches
// this query string will be parsed as "one three"
// but keyword analyzer will parse one@three to be "one three"
// query will be--field2:one three
verifyQuery("field2:\"one three\"", DEFAULT_FIELD, "A");
// keyword analyzer without double quote. It should be the same as
// with double quote
// query will be--field2:one@three
verifyQuery("field2:one@three", DEFAULT_FIELD, "C");
}
@Test()
public void shouldQueryUsingIntRangeQueryProvider() throws Exception {
// Note: range query on numeric field has some limitations. But IntRangeQueryProvider
// provided basic functionality
luceneService.createIndex(INDEX_NAME, REGION_NAME, LuceneService.REGION_VALUE_FIELD);
Region region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
region.put("primitiveInt0", 122);
region.put("primitiveInt1", 123);
region.put("primitiveInt2", 223);
region.put("primitiveInt3", 224);
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
verifyQueryUsingCustomizedProvider(LuceneService.REGION_VALUE_FIELD, 123, 223, "primitiveInt1",
"primitiveInt2");
}
@Ignore
@Test()
public void queryParserCannotQueryByRange() throws Exception {
// Note: range query on numeric field has some limitations. But IntRangeQueryProvider
// provided basic functionality
luceneService.createIndex(INDEX_NAME, REGION_NAME, LuceneService.REGION_VALUE_FIELD);
Region region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
region.put("primitiveInt0", 122);
region.put("primitiveInt1", 123);
region.put("primitiveInt2", 223);
region.put("primitiveInt3", 224);
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
// Note: current QueryParser cannot query by range. It's a known issue in lucene
verifyQuery(LuceneService.REGION_VALUE_FIELD + ":[123 TO 223]",
LuceneService.REGION_VALUE_FIELD);
region.put("primitiveDouble1", 123.0);
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
thrown.expectMessage("java.lang.IllegalArgumentException");
verifyQueryUsingCustomizedProvider(LuceneService.REGION_VALUE_FIELD, 123, 223, "primitiveInt1",
"primitiveInt2");
}
@Test()
public void shouldPaginateResults() throws Exception {
final LuceneQuery<Object, Object> query = addValuesAndCreateQuery(2);
final PageableLuceneQueryResults<Object, Object> pages = query.findPages();
assertTrue(pages.hasNext());
assertEquals(7, pages.size());
final List<LuceneResultStruct<Object, Object>> page1 = pages.next();
final List<LuceneResultStruct<Object, Object>> page2 = pages.next();
final List<LuceneResultStruct<Object, Object>> page3 = pages.next();
final List<LuceneResultStruct<Object, Object>> page4 = pages.next();
List<LuceneResultStruct<Object, Object>> allEntries = new ArrayList<>();
allEntries.addAll(page1);
allEntries.addAll(page2);
allEntries.addAll(page3);
allEntries.addAll(page4);
assertEquals(region.keySet(),
allEntries.stream().map(entry -> entry.getKey()).collect(Collectors.toSet()));
assertEquals(region.values(),
allEntries.stream().map(entry -> entry.getValue()).collect(Collectors.toSet()));
}
@Test
public void shouldReturnValuesFromFindValues() throws Exception {
final LuceneQuery<Object, Object> query = addValuesAndCreateQuery(2);
assertEquals(region.values(), new HashSet(query.findValues()));
}
private LuceneQuery<Object, Object> addValuesAndCreateQuery(int pagesize)
throws InterruptedException {
luceneService.createIndex(INDEX_NAME, REGION_NAME, "field1", "field2");
region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
// Put two values with some of the same tokens
String value1 = "one three";
String value2 = "one two three";
String value3 = "one@three";
region.put("A", new TestObject(value1, value1));
region.put("B", new TestObject(value2, value2));
region.put("C", new TestObject(value3, value3));
region.put("D", new TestObject(value1, value1));
region.put("E", new TestObject(value2, value2));
region.put("F", new TestObject(value3, value3));
region.put("G", new TestObject(value1, value2));
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
return luceneService.createLuceneQueryFactory().setPageSize(pagesize).create(INDEX_NAME,
REGION_NAME, "one", "field1");
}
@Test()
public void shouldTokenizeUsingMyCharacterAnalyser() throws Exception {
Map<String, Analyzer> fields = new HashMap<String, Analyzer>();
// not to specify field1's analyzer, it should use standard analyzer
// Note: fields has to contain "field1", otherwise, field1 will not be tokenized
fields.put("field1", null);
fields.put("field2", new MyCharacterAnalyzer());
luceneService.createIndex(INDEX_NAME, REGION_NAME, fields);
Region region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
// Put two values with some of the same tokens
String value1 = "one three";
String value4 = "two_four";
String value3 = "two@four";
region.put("A", new TestObject(value1, value4));
region.put("B", new TestObject(value1, value3));
region.put("C", new TestObject(value3, value3));
region.put("D", new TestObject(value4, value4));
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
verifyQuery("field1:one AND field2:two_four", DEFAULT_FIELD, "A");
verifyQuery("field1:one AND field2:two", DEFAULT_FIELD, "A");
verifyQuery("field1:three AND field2:four", DEFAULT_FIELD, "A");
}
@Test()
public void shouldAllowNullInFieldValue() throws Exception {
Map<String, Analyzer> fields = new HashMap<String, Analyzer>();
fields.put("field1", null);
fields.put("field2", null);
luceneService.createIndex(INDEX_NAME, REGION_NAME, fields);
Region region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
// Put two values with some of the same tokens
String value1 = "one three";
region.put("A", new TestObject(value1, null));
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
verifyQuery("field1:one", DEFAULT_FIELD, "A");
}
@Test()
public void queryJsonObject() throws Exception {
Map<String, Analyzer> fields = new HashMap<String, Analyzer>();
fields.put("name", null);
fields.put("lastName", null);
fields.put("address", null);
luceneService.createIndex(INDEX_NAME, REGION_NAME, fields);
Region region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
// Put two values with some of the same tokens
PdxInstance pdx1 = insertAJson(region, "jsondoc1");
PdxInstance pdx2 = insertAJson(region, "jsondoc2");
PdxInstance pdx10 = insertAJson(region, "jsondoc10");
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
HashMap expectedResults = new HashMap();
expectedResults.put("jsondoc1", pdx1);
expectedResults.put("jsondoc10", pdx10);
verifyQuery("name:jsondoc1*", DEFAULT_FIELD, expectedResults);
}
@Test()
public void shouldAllowQueryOnRegionWithStringValue() throws Exception {
luceneService.createIndex(INDEX_NAME, REGION_NAME, LuceneService.REGION_VALUE_FIELD);
Region region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
final LuceneIndex index = luceneService.getIndex(INDEX_NAME, REGION_NAME);
region.put("A", "one three");
index.waitUntilFlushed(60000, TimeUnit.MILLISECONDS);
verifyQuery("one", LuceneService.REGION_VALUE_FIELD, "A");
}
@Test()
public void throwFunctionExceptionWhenGivenBadQuery() throws Exception {
LuceneService luceneService = LuceneServiceProvider.get(cache);
luceneService.createIndex(INDEX_NAME, REGION_NAME, "text");
Region region = cache.createRegionFactory(RegionShortcut.PARTITION).create(REGION_NAME);
// Create a query that throws an exception
final LuceneQuery<Object, Object> query =
luceneService.createLuceneQueryFactory().create(INDEX_NAME, REGION_NAME, (index) -> {
throw new LuceneQueryException("Bad query");
});
thrown.expect(LuceneQueryException.class);
query.findPages();
}
@Test
public void shouldReturnAllResultsWhenPaginationIsDisabled() throws Exception {
// Pagination disabled by setting page size = 0.
final LuceneQuery<Object, Object> query = addValuesAndCreateQuery(0);
final PageableLuceneQueryResults<Object, Object> pages = query.findPages();
assertTrue(pages.hasNext());
assertEquals(7, pages.size());
final List<LuceneResultStruct<Object, Object>> page = pages.next();
assertFalse(pages.hasNext());
assertEquals(region.keySet(),
page.stream().map(entry -> entry.getKey()).collect(Collectors.toSet()));
assertEquals(region.values(),
page.stream().map(entry -> entry.getValue()).collect(Collectors.toSet()));
}
@Test
public void shouldReturnCorrectResultsOnDeletionAfterQueryExecution() throws Exception {
final LuceneQuery<Object, Object> query = addValuesAndCreateQuery(2);
final PageableLuceneQueryResults<Object, Object> pages = query.findPages();
List<LuceneResultStruct<Object, Object>> allEntries = new ArrayList<>();
assertTrue(pages.hasNext());
assertEquals(7, pages.size());
// Destroying an entry from the region after the query is executed.
region.destroy("C");
final List<LuceneResultStruct<Object, Object>> page1 = pages.next();
assertEquals(2, page1.size());
final List<LuceneResultStruct<Object, Object>> page2 = pages.next();
assertEquals(2, page2.size());
final List<LuceneResultStruct<Object, Object>> page3 = pages.next();
assertEquals(2, page3.size());
assertFalse(pages.hasNext());
allEntries.addAll(page1);
allEntries.addAll(page2);
allEntries.addAll(page3);
assertEquals(region.keySet(),
allEntries.stream().map(entry -> entry.getKey()).collect(Collectors.toSet()));
assertEquals(region.values(),
allEntries.stream().map(entry -> entry.getValue()).collect(Collectors.toSet()));
}
@Test
public void shouldReturnCorrectResultsOnMultipleDeletionsAfterQueryExecution() throws Exception {
final LuceneQuery<Object, Object> query = addValuesAndCreateQuery(2);
final PageableLuceneQueryResults<Object, Object> pages = query.findPages();
List<LuceneResultStruct<Object, Object>> allEntries = new ArrayList<>();
assertTrue(pages.hasNext());
assertEquals(7, pages.size());
// Destroying an entry from the region after the query is executed.
region.destroy("C");
allEntries.addAll(pages.next());
// Destroying an entry from allEntries and the region after it is fetched through pages.next().
Object removeKey = ((LuceneResultStruct) allEntries.remove(0)).getKey();
region.destroy(removeKey);
allEntries.addAll(pages.next());
// Destroying a region entry which has't been fetched through pages.next() yet.
Set resultKeySet = allEntries.stream().map(entry -> entry.getKey()).collect(Collectors.toSet());
for (Object key : region.keySet()) {
if (!resultKeySet.contains(key)) {
region.destroy(key);
break;
}
}
allEntries.addAll(pages.next());
assertFalse(pages.hasNext());
assertEquals(region.keySet(),
allEntries.stream().map(entry -> entry.getKey()).collect(Collectors.toSet()));
assertEquals(region.values(),
allEntries.stream().map(entry -> entry.getValue()).collect(Collectors.toSet()));
}
@Test
public void shouldReturnCorrectResultsOnAllDeletionsAfterQueryExecution() throws Exception {
final LuceneQuery<Object, Object> query = addValuesAndCreateQuery(2);
final PageableLuceneQueryResults<Object, Object> pages = query.findPages();
assertTrue(pages.hasNext());
assertEquals(7, pages.size());
region.destroy("A");
region.destroy("B");
region.destroy("C");
region.destroy("D");
region.destroy("E");
region.destroy("F");
region.destroy("G");
assertTrue(pages.hasNext());
final List<LuceneResultStruct<Object, Object>> page1 = pages.next();
assertEquals(2, page1.size());
assertFalse(pages.hasNext());
}
private PdxInstance insertAJson(Region region, String key) {
String jsonCustomer = "{" + "\"name\": \"" + key + "\"," + "\"lastName\": \"Smith\","
+ " \"age\": 25," + "\"address\":" + "{" + "\"streetAddress\": \"21 2nd Street\","
+ "\"city\": \"New York\"," + "\"state\": \"NY\"," + "\"postalCode\": \"10021\"" + "},"
+ "\"phoneNumber\":" + "[" + "{" + " \"type\": \"home\"," + "\"number\": \"212 555-1234\""
+ "}," + "{" + " \"type\": \"fax\"," + "\"number\": \"646 555-4567\"" + "}" + "]" + "}";
PdxInstance pdx = JSONFormatter.fromJSON(jsonCustomer);
region.put(key, pdx);
return pdx;
}
private void verifyQueryUsingCustomizedProvider(String fieldName, int lowerValue, int upperValue,
String... expectedKeys) throws Exception {
IntRangeQueryProvider provider = new IntRangeQueryProvider(fieldName, lowerValue, upperValue);
LuceneQuery<String, Object> queryWithCustomizedProvider =
luceneService.createLuceneQueryFactory().create(INDEX_NAME, REGION_NAME, provider);
verifyQueryKeys(queryWithCustomizedProvider, expectedKeys);
}
private void verifyQuery(String query, String defaultField, String... expectedKeys)
throws Exception {
final LuceneQuery<String, Object> queryWithStandardAnalyzer = luceneService
.createLuceneQueryFactory().create(INDEX_NAME, REGION_NAME, query, defaultField);
verifyQueryKeys(queryWithStandardAnalyzer, expectedKeys);
}
private void verifyQuery(String query, String DEFAULT_FIELD, HashMap expectedResults)
throws Exception {
final LuceneQuery<String, Object> queryWithStandardAnalyzer = luceneService
.createLuceneQueryFactory().create(INDEX_NAME, REGION_NAME, query, DEFAULT_FIELD);
verifyQueryKeyAndValues(queryWithStandardAnalyzer, expectedResults);
}
private static class MyCharacterTokenizer extends CharTokenizer {
@Override
protected boolean isTokenChar(final int character) {
return '_' != character;
}
}
private static class MyCharacterAnalyzer extends Analyzer {
@Override
protected TokenStreamComponents createComponents(final String field) {
Tokenizer tokenizer = new MyCharacterTokenizer();
TokenStream filter = new LowerCaseFilter(tokenizer);
return new TokenStreamComponents(tokenizer, filter);
}
}
public static class IntRangeQueryProvider
implements LuceneQueryProvider, DataSerializableFixedID {
public static final short LUCENE_INT_RANGE_QUERY_PROVIDER = 2177;
String fieldName;
int lowerValue;
int upperValue;
private transient Query luceneQuery;
public IntRangeQueryProvider(String fieldName, int lowerValue, int upperValue) {
this.fieldName = fieldName;
this.lowerValue = lowerValue;
this.upperValue = upperValue;
}
@Override
public Version[] getSerializationVersions() {
return null;
}
@Override
public int getDSFID() {
return LUCENE_INT_RANGE_QUERY_PROVIDER;
}
@Override
public void toData(DataOutput out) throws IOException {
DataSerializer.writeString(fieldName, out);
out.writeInt(lowerValue);
out.writeInt(upperValue);
}
@Override
public void fromData(DataInput in) throws IOException, ClassNotFoundException {
fieldName = DataSerializer.readString(in);
lowerValue = in.readInt();
upperValue = in.readInt();
}
@Override
public Query getQuery(LuceneIndex index) throws LuceneQueryException {
if (luceneQuery == null) {
luceneQuery = IntPoint.newRangeQuery(fieldName, lowerValue, upperValue);
}
return luceneQuery;
}
}
}