/*
* Copyright 2011 Global Biodiversity Information Facility (GBIF)
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.gbif.checklistbank.index.service;
import org.gbif.api.model.checklistbank.Description;
import org.gbif.api.model.checklistbank.VernacularName;
import org.gbif.api.model.checklistbank.search.NameUsageSearchParameter;
import org.gbif.api.model.checklistbank.search.NameUsageSearchRequest;
import org.gbif.api.model.checklistbank.search.NameUsageSearchResult;
import org.gbif.api.model.checklistbank.search.NameUsageSuggestRequest;
import org.gbif.api.model.checklistbank.search.NameUsageSuggestResult;
import org.gbif.api.model.common.search.SearchResponse;
import org.gbif.api.service.checklistbank.NameUsageSearchService;
import org.gbif.api.vocabulary.NomenclaturalStatus;
import org.gbif.api.vocabulary.Rank;
import org.gbif.checklistbank.index.backfill.SolrTestSetup;
import org.gbif.checklistbank.index.guice.SearchTestModule;
import org.gbif.checklistbank.service.mybatis.postgres.ClbDbTestRule;
import org.gbif.common.search.solr.SolrConstants;
import org.gbif.utils.file.properties.PropertiesUtil;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import javax.annotation.Nullable;
import com.google.common.base.Function;
import com.google.common.collect.Sets;
import com.google.inject.Guice;
import com.google.inject.Injector;
import org.junit.Assert;
import org.junit.BeforeClass;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
/**
* Integration tests using an embedded solr server with the mybatis squirrels test dataset.
* The solr index will be rebuild before the test using the NameUsageIndexerBaseIT base class.
*/
public class NameUsageSearchServiceIT {
protected final Logger log = LoggerFactory.getLogger(NameUsageSearchServiceIT.class);
private static final String PROPERTY_FILE = "checklistbank.properties";
private static NameUsageSearchService searchService;
private static String SQUIRRELS_DATASET_KEY = "109aea14-c252-4a85-96e2-f5f4d5d088f4";
@BeforeClass
public static void setup() throws Exception {
// creates squirrels db and solr index & server using its own injector
SolrTestSetup setup = new SolrTestSetup(ClbDbTestRule.squirrels());
setup.setup();
// insert new injector for this test, reusing existing solr server
Properties props = PropertiesUtil.loadProperties(PROPERTY_FILE);
Injector injector = Guice.createInjector(new SearchTestModule(props, setup.solr()));
searchService = injector.getInstance(NameUsageSearchService.class);
}
@Test(expected = IllegalArgumentException.class)
public void testBadFilterRequest() {
// it should be a known rank enum name
NameUsageSearchRequest searchRequest = new NameUsageSearchRequest(0L, 10);
searchRequest.addParameter(NameUsageSearchParameter.RANK, "1");
searchService.search(searchRequest);
}
@Test
public void testEnumFilterRequest() {
// test good query with a rank enum name
NameUsageSearchRequest req = new NameUsageSearchRequest(0L, 25);
req.addParameter(NameUsageSearchParameter.RANK, "order");
assertSearch(req, 2l, null);
req.addParameter(NameUsageSearchParameter.RANK, "genus");
assertSearch(req, 16l, null);
}
@Test
public void testNegatedFilters() {
// test good query with a rank enum name
NameUsageSearchRequest req = new NameUsageSearchRequest(0L, 50);
req.addParameter(NameUsageSearchParameter.RANK, "!genus");
req.addParameter(NameUsageSearchParameter.RANK, "!species");
req.addParameter(NameUsageSearchParameter.RANK, "!subspecies");
assertSearch(req, 15l, null);
}
@Test(expected = IllegalArgumentException.class)
public void testNegatedFiltersError() {
// test good query with a rank enum name
NameUsageSearchRequest req = new NameUsageSearchRequest(0L, 50);
req.addParameter(NameUsageSearchParameter.RANK, "!genus");
req.addParameter(NameUsageSearchParameter.RANK, "species");
assertSearch(req, null, null);
}
@Test
public void testSearchScientificNameNoFacets() {
assertSearch("Rodentia", 2L);
assertSearch("Rodentia Bowdich", 2L);
assertSearch("Rodentia Bowdich, 1821", 2L);
assertSearch("Rodentia Bowdich 1821", 2L);
assertSearch("Puma concolor, 1821", 0L);
assertSearch("vulgaris", 10L);
assertSearch("Sciurus", 17L);
assertSearch("Sciurillus pusillus", 4L);
}
@Test
public void testSearchScientificNameWithRankFacet() {
assertSearch("vulgaris", NameUsageSearchParameter.RANK, null, 10L, null);
assertSearch("Sciurus vulgaris", NameUsageSearchParameter.RANK, null, 10L, null);
assertSearch("Sciurus vulgaris", NameUsageSearchParameter.RANK, Rank.SPECIES, 1l, null);
assertSearch("Sciurus vulgaris", NameUsageSearchParameter.RANK, Rank.VARIETY, 2l, null);
assertSearch("Sciurus vulgaris", NameUsageSearchParameter.RANK, Rank.VARIETY, 2l, null);
}
@Test
public void testNomStatusFacet() {
assertSearch(null, NameUsageSearchParameter.NOMENCLATURAL_STATUS, NomenclaturalStatus.NUDUM, 1L, 2);
SearchResponse<NameUsageSearchResult, NameUsageSearchParameter> resp = search(null, NameUsageSearchParameter.NOMENCLATURAL_STATUS, NomenclaturalStatus.NUDUM);
assertEquals((Integer) 100000026, resp.getResults().get(0).getKey());
}
@Test
public void testHigherTaxonFilter() {
NameUsageSearchRequest req = new NameUsageSearchRequest();
req.addChecklistFilter(UUID.fromString(SQUIRRELS_DATASET_KEY));
req.addHigherTaxonFilter(100000024);
assertSearch(req, 14L, null);
}
@Test
public void testSuggest() {
List<NameUsageSuggestResult> results = searchSuggest("sci");
assertEquals(27, results.size());
// highest rank first
assertEquals("Sciuromorpha Brandt, 1855", results.get(0).getScientificName());
// match start of later epithets
results = searchSuggest("alpin");
assertEquals(1, results.size());
assertEquals("Sciurus vulgaris var. alpinus Desmarest, 1822", results.get(0).getScientificName());
// match entire epithets, highest rank first
results = searchSuggest("vulgaris");
assertEquals(10, results.size());
assertEquals("Sciurus vulgaris Linnaeus, 1758", results.get(0).getScientificName());
// https://github.com/gbif/checklistbank/issues/11
assertSciurusVulgaris("Sciurus v");
assertSciurusVulgaris("Sciurus vu");
assertSciurusVulgaris("Sciurus vul");
assertSciurusVulgaris("Sciurus vulg");
assertSciurusVulgaris("Sciurus vulga");
assertSciurusVulgaris("Sciurus vulgar");
assertSciurusVulgaris("Sciurus vulgari");
assertSciurusVulgaris("Sciurus vulgaris");
// only match squirrel dataset
results = searchSuggest("Roden");
assertEquals(1, results.size());
assertEquals("Rodentia Bowdich, 1821", results.get(0).getScientificName());
results = searchSuggest("Sciurillus pusillus");
assertEquals(4, results.size());
assertEquals("Sciurillus pusillus E. Geoffroy, 1803", results.get(0).getScientificName());
// we change the gender epithets
results = searchSuggest("Sciurilus pusila");
assertEquals(4, results.size());
assertEquals("Sciurillus pusillus E. Geoffroy, 1803", results.get(0).getScientificName());
// we change the gender epithets
results = searchSuggest("Sciurilus pussy");
assertEquals(4, results.size());
assertEquals("Sciurillus pusillus E. Geoffroy, 1803", results.get(0).getScientificName());
// apply sciname normalizer only to entire names, so the ngrams dont match
results = searchSuggest("Sciurilus pusilus");
assertEquals(4, results.size());
assertEquals("Sciurillus pusillus E. Geoffroy, 1803", results.get(0).getScientificName());
results = searchSuggest("Sciu");
assertEquals(27, results.size());
assertEquals("Sciuromorpha Brandt, 1855", results.get(0).getScientificName());
}
private void assertSciurusVulgaris(String q) {
List<NameUsageSuggestResult> results = searchSuggest(q);
assertEquals(10, results.size());
assertEquals("Sciurus vulgaris Linnaeus, 1758", results.get(0).getScientificName());
}
@Test
public void testHighlighting() {
// build request
NameUsageSearchRequest searchRequest = new NameUsageSearchRequest(0L, 10);
searchRequest.setQ("Eichhörnchen");
searchRequest.setHighlight(true);
searchRequest.setExtended(false);
searchRequest.setHighlightContext(5);
// query with extended=false
SearchResponse<NameUsageSearchResult, NameUsageSearchParameter> response = searchService.search(searchRequest);
assertEquals((Long) 2l, response.getCount());
// this usage also matches description and should therefore come first
assertEquals((Integer) 100000040, response.getResults().get(0).getKey());
// assert vernaculars
List<VernacularName> names = response.getResults().get(0).getVernacularNames();
assertEquals(1, names.size());
assertEquals(1, countHighlights(names, getVernacular));
// assert descriptions
List<Description> descriptions = response.getResults().get(0).getDescriptions();
assertEquals(1, countHighlights(descriptions, getDescriptions));
// query extended, but hl description only
searchRequest.setExtended(true);
searchRequest.setHighlightFields(Sets.newHashSet(NameUsageSearchRequest.QueryField.DESCRIPTION));
response = searchService.search(searchRequest);
// assert 2 vernacular names without highlighting
names = response.getResults().get(0).getVernacularNames();
assertEquals(2, names.size());
assertEquals(0, countHighlights(names, getVernacular));
// assert descriptions
descriptions = response.getResults().get(0).getDescriptions();
assertEquals(3, descriptions.size());
assertEquals(1, countHighlights(descriptions, getDescriptions));
}
private static <T> int countHighlights(List<T> objs, Function<T, String> getText) {
int cnt = 0;
for (T obj : objs) {
String text = getText.apply(obj);
if (text != null && SolrConstants.HL_REGEX.matcher(text).find()) {
cnt++;
}
}
return cnt;
}
Function<Description, String> getDescriptions = new Function<Description, String>() {
@Nullable
@Override
public String apply(@Nullable Description input) {
return input.getDescription();
}
};
Function<VernacularName, String> getVernacular = new Function<VernacularName, String>() {
@Nullable
@Override
public String apply(@Nullable VernacularName input) {
return input.getVernacularName();
}
};
@Test
public void testVernacularNames() {
SearchResponse<NameUsageSearchResult, NameUsageSearchParameter> resp =
search("Sciurus vulgaris Linnaeus", NameUsageSearchParameter.RANK, Rank.SPECIES);
assertEquals(1, resp.getResults().size());
NameUsageSearchResult nu = resp.getResults().get(0);
assertEquals(2, nu.getVernacularNames().size());
resp = search("Sciurus vulgaris Linnaeus", NameUsageSearchParameter.RANK, Rank.SPECIES);
assertEquals(2, resp.getResults().get(0).getVernacularNames().size());
resp = search("Sciurus vulgaris Linnaeus", NameUsageSearchParameter.RANK, Rank.SPECIES);
assertEquals(2, resp.getResults().get(0).getVernacularNames().size());
resp = search("Eichhörnchen", null, null);
assertEquals(2, resp.getResults().size());
resp = search("Europäisches Eichhörnchen", null, null);
assertEquals(1, resp.getResults().size());
assertEquals(2, resp.getResults().get(0).getVernacularNames().size());
Set<String> vnames = getVernacularNamesSet(resp.getResults().get(0));
assertTrue(vnames.contains("Europäisches Eichhörnchen"));
}
@Test
public void testExtendedSearch() {
SearchResponse<NameUsageSearchResult, NameUsageSearchParameter> resp = search("Sciurus vulgaris Linnaeus", NameUsageSearchParameter.RANK, Rank.SPECIES);
assertEquals(1, resp.getResults().size());
NameUsageSearchResult nu = resp.getResults().get(0);
assertEquals(2, nu.getVernacularNames().size());
resp = search("Sciurus vulgaris Linnaeus", NameUsageSearchParameter.RANK, Rank.SPECIES);
assertEquals(2, resp.getResults().get(0).getVernacularNames().size());
resp = search("Sciurus vulgaris Linnaeus", NameUsageSearchParameter.RANK, Rank.SPECIES);
assertEquals(2, resp.getResults().get(0).getVernacularNames().size());
resp = search("Eichhörnchen", null, null);
assertEquals(2, resp.getResults().size());
resp = search("Europäisches Eichhörnchen", null, null);
assertEquals(1, resp.getResults().size());
assertEquals(2, resp.getResults().get(0).getVernacularNames().size());
Set<String> vnames = getVernacularNamesSet(resp.getResults().get(0));
assertTrue(vnames.contains("Europäisches Eichhörnchen"));
// test good query with a rank enum name
NameUsageSearchRequest req = new NameUsageSearchRequest(0L, 50);
req.setQ("Rodent");
req.getQueryFields().clear();
assertSearch(req, 0l, null);
req.getQueryFields().add(NameUsageSearchRequest.QueryField.DESCRIPTION);
assertSearch(req, 2l, null);
req.setQ("Palearctic");
assertSearch(req, 1l, null);
req.getQueryFields().clear();
req.getQueryFields().add(NameUsageSearchRequest.QueryField.VERNACULAR);
assertSearch(req, 0l, null);
}
private void assertSearch(String q, Long expectedCount) {
assertSearch(q, null, null, expectedCount, null);
}
private SearchResponse<NameUsageSearchResult, NameUsageSearchParameter> assertSearch(String q, NameUsageSearchParameter facet, Enum<?> facetFilter, Long expectedCount, Integer expectedFacetCounts) {
return assertSearch(buildSearch(q, facet, facetFilter), expectedCount, expectedFacetCounts);
}
private SearchResponse<NameUsageSearchResult, NameUsageSearchParameter> assertSearch(NameUsageSearchRequest req, Long expectedCount, Integer expectedFacetCounts) {
SearchResponse<NameUsageSearchResult, NameUsageSearchParameter> response = searchService.search(req);
// assert
if (expectedCount != null) {
assertEquals(expectedCount, response.getCount());
}
if (expectedFacetCounts != null) {
assertEquals(1, response.getFacets().size());
assertEquals(expectedFacetCounts, (Integer) response.getFacets().get(0).getCounts().size());
}
if (req.getFacets().isEmpty()) {
Assert.assertTrue(response.getFacets().isEmpty());
}
return response;
}
private Set<String> getVernacularNamesSet(NameUsageSearchResult usage) {
Set<String> vnames = Sets.newHashSet();
for (VernacularName vn : usage.getVernacularNames()) {
vnames.add(vn.getVernacularName());
}
return vnames;
}
private SearchResponse<NameUsageSearchResult, NameUsageSearchParameter> search(String q, NameUsageSearchParameter facet, Enum<?> filter) {
return searchService.search(buildSearch(q, facet, filter));
}
private NameUsageSearchRequest buildSearch(String q, NameUsageSearchParameter facet, Enum<?> filter) {
// build request
NameUsageSearchRequest searchRequest = new NameUsageSearchRequest(0L, 10);
searchRequest.setQ(q);
if (filter != null) {
searchRequest.addParameter(facet, filter);
searchRequest.addParameter(facet, filter);
}
if (facet != null) {
searchRequest.addFacets(facet);
}
// query
return searchRequest;
}
/**
* Utility method for testing suggest service.
*/
private List<NameUsageSuggestResult> searchSuggest(String q) {
NameUsageSuggestRequest req = new NameUsageSuggestRequest();
req.setQ(q);
req.setLimit(100);
req.addParameter(NameUsageSearchParameter.DATASET_KEY, SQUIRRELS_DATASET_KEY);
return searchService.suggest(req);
}
}