/*
* Copyright (c) 2006-2013 by Public Library of Science
* http://plos.org
* http://ambraproject.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ambraproject.service.search;
import org.ambraproject.ApplicationException;
import org.ambraproject.service.cache.Cache;
import org.ambraproject.util.Pair;
import org.ambraproject.views.SearchHit;
import org.ambraproject.views.SearchResultSinglePage;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.configuration.HierarchicalConfiguration;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.TimeZone;
import java.util.TreeMap;
import java.util.regex.Pattern;
/**
* Service to provide search capabilities for the application.
*
* @author Scott Sterling
* @author Dragisa Krsmanovic
* @author Joe Osowski
*/
public class SolrSearchService implements SearchService {
private static final Logger log = LoggerFactory.getLogger(SolrSearchService.class);
private static final int CACHE_TTL = 3600 * 24; // one day
private SolrServerFactory serverFactory;
private Cache cache;
private int queryTimeout;
private static final int MAX_FACET_SIZE = 100;
private static final int MIN_FACET_COUNT = 1;
// sort option possible values (sort direction is optional)
// field desc|asc
// sum(field1, field2) desc|asc
// break up the option string on comma: ","
private static final Pattern SORT_OPTION_PATTERN = Pattern.compile(",(?![^\\(\\)]*\\))");
private Map validKeywords = null;
private List pageSizes = null;
//We have two collections here, as list supports ordering
//And we want to keep the sorts in the order in which they are defined
private List displaySorts = null;
private Map validSorts = null;
/**
* Perform an "all the words" search (across most article fields)
* <p/>
* It uses <a href="http://wiki.apache.org/solr/DisMaxRequestHandler">DisMax Query Parser</a>.
*
* @param sParams The search parameters to use.
* @return One "page" of articles which contain the terms in <code>queryString</code>
* @throws ApplicationException Thrown by a failed query attempt
*/
public SearchResultSinglePage simpleSearch(SearchParameters sParams) throws ApplicationException {
sParams.setQuery(sParams.getQuery());
log.debug("Simple Search performed on the String: " + sParams.getQuery());
//We query SOLR three times.
// 1 - The main 'big' query
// 2 - Make the cross journals facet
// 3 - (If applicable) the Keywords facet
SolrQuery query = createQuery(sParams.getQuery(),
sParams.getStartPage(), sParams.getPageSize(), true);
//Notice: there is some code duplication here. note below
SolrQuery journalFacetsQuery = createFacetsQuery(sParams.getQuery(), "cross_published_journal_key", true);
SolrQuery articleTypeFacetsQuery = createFacetsQuery(sParams.getQuery(), "article_type_facet", true);
//Set filters for the three queries,
setFilters(query, sParams, false, false);
//The journals query doesn't get the journal filter and the articles query doesn't get the articles filter
//Notice: there is some code duplication here. note below
setFilters(journalFacetsQuery, sParams, true, false);
setFilters(articleTypeFacetsQuery, sParams, false, true);
//Set the sort ordering for results, if applicable.
setSort(query, sParams);
//If the keywords parameter is specified, we need to change what field we're querying against
//aka, body, conclusions, materials and methods ... etc ...
if (sParams.getFilterKeyword().length() > 0) {
String fieldkey = sParams.getFilterKeyword();
if (!validKeywords.containsKey(fieldkey)) {
throw new ApplicationException("Invalid filterKeyword value of " +
fieldkey + " specified");
}
String fieldName = (String) validKeywords.get(fieldkey);
//Set the field for dismax to use
query.set("qf", fieldName);
journalFacetsQuery.set("qf", fieldName);
articleTypeFacetsQuery.set("qf", fieldName);
}
//Perform searches!
SearchResultSinglePage results = search(query);
QueryResponse journalFacetsResponse = getSOLRResponse(journalFacetsQuery);
QueryResponse articleTypeFacetsResponse = getSOLRResponse(articleTypeFacetsQuery);
FacetField journals = journalFacetsResponse.getFacetField("cross_published_journal_key");
FacetField articleTypes = articleTypeFacetsResponse.getFacetField("article_type_facet");
results.setJournalFacet(facetCountsToHashMap(journals));
results.setArticleTypeFacet(facetCountsToHashMap(articleTypes));
//Only execute the keyword search facet if the keyword wasn't specified
if (sParams.getFilterKeyword().length() == 0) {
SolrQuery keywordFacetQuery = createKeywordFacetQuery(sParams.getQuery());
setFilters(keywordFacetQuery, sParams, false, false);
FacetField keywords = facetSearch(keywordFacetQuery, "doc_partial_type");
results.setKeywordFacet(facetCountsToHashMap(keywords));
}
return results;
}
/**
* Execute a Solr search composed from the contents of the <code>SearchParameters.unformattedQuery</code> property.
* The query is filtered by the journal and category fields also contained in the <code>searchParameters</code>
* parameter. No filter is created for date ranges, since that is assumed to be contained in
* <code>SearchParameters.unformattedQuery</code>.
*
* @param searchParameters Contains all the parameters necessary to execute a search against the Solr query engine
* @return A subset (determined by <code>SearchParameters.startPage</code> and <code>SearchParameters.pageSize</code>
* of the results of the Solr query generated from the contents of the <code>searchParameters</code>
* parameter
* @throws ApplicationException Thrown during failed interactions with the Solr Server
*/
public SearchResultSinglePage advancedSearch(SearchParameters searchParameters) throws ApplicationException {
SearchParameters sp = cleanStrings(searchParameters); // Does not impact unformattedQuery field.
if (log.isDebugEnabled()) {
log.debug("Solr Search performed on the unformattedSearch String: "
+ searchParameters.getUnformattedQuery().trim());
}
SolrQuery query = createQuery(null, sp.getStartPage(), sp.getPageSize(), false);
query.setQuery(searchParameters.getUnformattedQuery().trim());
SolrQuery journalFacetsQuery = createFacetsQuery(query.getQuery(), "cross_published_journal_key", false);
SolrQuery articleTypeFacetsQuery = createFacetsQuery(query.getQuery(), "article_type_facet", false);
setFilters(query, sp, false, false);
//The journals query doesn't get the journal filter and the articles query doesn't get the articles filter
//Notice: there is some code duplication here. note above
setFilters(journalFacetsQuery, sp, true, false);
setFilters(articleTypeFacetsQuery, sp, false, true);
setSort(query, sp);
QueryResponse journalFacetsResponse = getSOLRResponse(journalFacetsQuery);
QueryResponse articleTypeFacetsResponse = getSOLRResponse(articleTypeFacetsQuery);
//Notice: there is some code duplication here. note above
FacetField journals = journalFacetsResponse.getFacetField("cross_published_journal_key");
FacetField articleTypes = articleTypeFacetsResponse.getFacetField("article_type_facet");
SearchResultSinglePage results = search(query.setQuery(searchParameters.getUnformattedQuery().trim()));
results.setJournalFacet(facetCountsToHashMap(journals));
results.setArticleTypeFacet(facetCountsToHashMap(articleTypes));
return results;
}
/**
* @inheritDoc
*/
@Override
public SearchHit getMostSharedForJournalCategory(String journal, String subjectArea)
throws ApplicationException {
SearchParameters sp = new SearchParameters();
sp.setFilterSubjects(new String[] { subjectArea });
sp.setFilterJournals(new String[] { journal });
//We only need one record
sp.setPageSize(1);
sp.setStartPage(0);
//Only search for articles with shares
//We might turn this info a filter query for a small performance boost
sp.setUnformattedQuery("alm_twitterCount:[1 TO *] OR alm_facebookCount:[1 TO *]");
sp.setSortValue("sum(alm_twitterCount, alm_facebookCount) desc");
SearchResultSinglePage results = advancedSearch(sp);
if(results.getHits().size() > 0) {
return results.getHits().get(0);
} else {
return null;
}
}
/**
* @inheritDoc
*/
@Override
public SearchHit getMostViewedForJournalCategory(String journal, String subjectArea)
throws ApplicationException {
SearchParameters sp = new SearchParameters();
sp.setFilterSubjects(new String[] { subjectArea });
sp.setFilterJournals(new String[] { journal });
//We only need one record
sp.setPageSize(1);
sp.setStartPage(0);
//Only search for articles with shares
//We might turn this info a filter query for a small performance boost
sp.setUnformattedQuery("counter_total_month:[1 TO *]");
sp.setSortValue("counter_total_month desc");
SearchResultSinglePage results = advancedSearch(sp);
if(results.getHits().size() > 0) {
return results.getHits().get(0);
} else {
return null;
}
}
/**
* @inheritDoc
*/
@Override
public SearchHit getMostViewedAllTimeForJournalCategory(String journal, String subjectArea)
throws ApplicationException {
SearchParameters sp = new SearchParameters();
sp.setFilterSubjects(new String[] { subjectArea });
sp.setFilterJournals(new String[] { journal });
//We only need one record
sp.setPageSize(1);
sp.setStartPage(0);
//Only search for articles with shares
//We might turn this info a filter query for a small performance boost
sp.setUnformattedQuery("counter_total_all:[1 TO *]");
sp.setSortValue("counter_total_all desc");
SearchResultSinglePage results = advancedSearch(sp);
if(results.getHits().size() > 0) {
return results.getHits().get(0);
} else {
return null;
}
}
/**
* Populate facets of the search object.
* <p/>
* If no search results and hence facets are found remove defined filters and try the search again. Journals will
* always be the complete list.
*
* @param searchParameters The search parameters
* @return a populared SearchResultSinglePage object
* @throws ApplicationException
*/
public SearchResultSinglePage getFilterData(SearchParameters searchParameters) throws ApplicationException {
//TODO: This function queries SOLR for the journal and article type list
//We should migrate this away from config and into a database when it is
//available
//Does not impact unformattedQuery field.
SearchParameters sp = cleanStrings(searchParameters);
String q = searchParameters.getUnformattedQuery().trim();
//In this use case, if the query string is empty, we want to get facets for everything
if (q.length() == 0) {
q = "*:*";
}
if (log.isDebugEnabled()) {
log.debug("Solr Search performed to get facet data on the unformattedSearch String: "
+ q);
}
//We want a complete set of facet data. So first, lets get it all
SolrQuery query = createQuery("*:*", 0, 0, false);
//Remove facets we don't use in this case
query.removeFacetField("author_facet");
query.removeFacetField("editor_facet");
query.removeFacetField("affiliate_facet");
//Add the one we do want in this case.
query.addFacetField("cross_published_journal_key");
query.addFacetField("article_type");
query.setFacetLimit(MAX_FACET_SIZE);
//Related to JO: http://joborder.plos.org/view.php?id=17480
//(for now) we don't want to search on Issue Images
query.addFilterQuery(createFilterNoIssueImageDocuments());
SearchResultSinglePage preFilterResults = search(query);
setFilters(query, sp, false, false);
query.setQuery(q);
SearchResultSinglePage results = null;
try {
results = search(query);
} catch (SolrException e) {
query.setQuery("*:*");
if (log.isWarnEnabled()) {
log.warn("Solr Search failed on the unformattedSearch String: { " + query.getQuery()
+ " } so the query will be re-run using the String *:* to populate the Filters"
+ " on the Advanced Search page.", e);
}
}
if (results == null || results.getTotalNoOfResults() == 0) {
//If no results, remove optional filters and try again
for (String filter : query.getFilterQueries()) {
if (filter.indexOf(createFilterFullDocuments()) < 0) {
query.removeFilterQuery(filter);
}
}
results = search(query);
//If results are STILL empty. We must return something for subjects and article type.
//So let's use the global list
if (results.getTotalNoOfResults() == 0) {
results.setSubjectFacet(preFilterResults.getSubjectFacet());
results.setArticleTypeFacet(preFilterResults.getArticleTypeFacet());
}
results.setFiltersReset(true);
}
//Lets always return ALL values for journals
//These lists will not be dependant on the user's other
//selections other then the query
//However, subjects and article type will be!
results.setJournalFacet(preFilterResults.getJournalFacet());
results.setArticleTypeFacet(preFilterResults.getArticleTypeFacet());
return results;
}
/**
* @inheritDoc
*/
@Override
public List<String> getAllSubjects(String journal) throws ApplicationException {
QueryResponse queryResponse = executeSubjectFacetSearch("subject_hierarchy", journal);
FacetField facet = queryResponse.getFacetField("subject_hierarchy");
List<String> results = new ArrayList<String>(facet.getValues().size());
for (FacetField.Count count : facet.getValues()) {
results.add(count.getName());
}
return results;
}
/**
* @inheritDoc
*/
@Override
public SubjectCounts getAllSubjectCounts(String journal) throws ApplicationException {
QueryResponse queryResponse = executeSubjectFacetSearch("subject_facet", journal);
FacetField facet = queryResponse.getFacetField("subject_facet");
SubjectCounts results = new SubjectCounts();
for (FacetField.Count count : facet.getValues()) {
results.subjectCounts.put(count.getName(), count.getCount());
}
results.totalArticles = queryResponse.getResults().getNumFound();
return results;
}
/**
* Executes a search where results are grouped by one of the subject facets in the solr schema.
*
* @param facetName the subject facet of interest. Depending on the application, this should be
* either "subject_facet" or "subject_hierarchy". The first does not include the entire taxonomy
* path, while the second does.
* @param journal journal of interest
* @return solr server response
* @throws ApplicationException
*/
private QueryResponse executeSubjectFacetSearch(String facetName, String journal) throws ApplicationException {
SolrQuery query = createQuery("*:*", 0, 0, false);
// We don't care about results, just facet counts.
query.setRows(0);
// We only care about full documents
query.addFilterQuery(createFilterFullDocuments());
query.addFilterQuery(createFilterNoIssueImageDocuments());
// Remove facets we don't use in this case.
query.removeFacetField("author_facet");
query.removeFacetField("editor_facet");
query.removeFacetField("affiliate_facet");
query.removeFacetField("subject_facet");
query.removeFacetField("subject_hierarchy");
// Add the one we do want.
query.addFacetField(facetName);
if(journal != null && journal.length() > 0) {
query.addFilterQuery("cross_published_journal_key:" + journal);
}
query.setFacetLimit(-1); // unlimited
return getSOLRResponse(query);
}
/**
* @enheritDoc
*/
public SortedMap<String, Long> getTopSubjects() throws ApplicationException {
if (cache == null) {
return getTopSubjectsFromSOLR();
} else {
String key = "topLevelCategoriesCacheKey".intern();
return cache.get(key, CACHE_TTL,
new Cache.SynchronizedLookup<SortedMap<String, Long>, ApplicationException>(key) {
@Override
public SortedMap<String, Long> lookup() throws ApplicationException {
return getTopSubjectsFromSOLR();
}
});
}
}
private SortedMap<String, Long> getTopSubjectsFromSOLR() throws ApplicationException {
SolrQuery query = createQuery("*:*", 0, 0, false);
// We don't care about results, just facet counts.
query.setRows(0);
// We only care about full documents
query.addFilterQuery(createFilterFullDocuments());
// Remove facets we don't use in this case.
query.removeFacetField("author_facet");
query.removeFacetField("editor_facet");
query.removeFacetField("affiliate_facet");
query.removeFacetField("subject_facet");
// Add the one we do want.
query.addFacetField("subject_level_1");
query.setFacetLimit(-1); // unlimited
QueryResponse queryResponse = getSOLRResponse(query);
FacetField facet = queryResponse.getFacetField("subject_level_1");
SortedMap<String, Long> results = new TreeMap<String, Long>();
//If there is no facet. Should never happen outside a unit test
if(facet.getValues() == null) {
log.warn("No subject_level_1 facet");
} else {
for (FacetField.Count count : facet.getValues()) {
results.put(count.getName(), count.getCount());
}
}
return results;
}
/**
* Add a <i>sort</i> (on a single field) clause to the <code>query</code> parameter. If the
* <code>SearchParameters.sort</code> variable contains a single value (no white space), then that value is assumed to
* be a field name. If the <code>SearchParameters.sort</code> variable contains two values (separated by whitespace),
* then the first is assumed to be a field name and the second is assumed to be a <i>sort direction</i>, one of
* <strong>desc</strong> or <strong>asc</strong>.
* <p/>
* If there is only one value in the <code>SearchParameters.sort</code> variable or if the second value is not
* (non-case-sensitive) <strong>asc</strong>, then the <i>sort direction</i> defaults to <strong>desc</strong>.
*
* @param query The SolrQuery which will have a <i>sort</i> clause attached
* @param sp The SearchParameters DTO which contains the <code>sort</code> field used by this method
*/
private void setSort(SolrQuery query, SearchParameters sp) throws ApplicationException {
if (log.isDebugEnabled()) {
log.debug("SearchParameters.sort = " + sp.getSortKey());
}
if (sp.getSortKey().length() > 0 || (sp.getSortValue() != null && sp.getSortValue().length() > 0)) {
String sortKey = sp.getSortKey();
String sortValue = (String) validSorts.get(sortKey);
//This bit allows a consumer of the method to explicitly set the sort instead of specifying it by key
if(sp.getSortValue() != null && sp.getSortValue().length() > 0) {
sortValue = sp.getSortValue();
} else {
if (sortValue == null) {
throw new ApplicationException("Invalid sort key of '" + sp.getSortKey() + "' specified.");
}
}
String[] sortOptions = SORT_OPTION_PATTERN.split(sortValue);
for (String sortOption : sortOptions) {
sortOption = sortOption.trim();
int index = sortOption.lastIndexOf(" ");
String fieldName = sortOption;
String sortDirection = null;
if (index != -1) {
fieldName = sortOption.substring(0, index);
sortDirection = sortOption.substring(index + 1).trim();
}
if (sortDirection == null || !sortDirection.toLowerCase().equals("asc")) {
query.addSortField(fieldName, SolrQuery.ORDER.desc);
} else {
query.addSortField(fieldName, SolrQuery.ORDER.asc);
}
}
}
if (query.getSortField() == null || query.getSortField().length() == 0) {
//Always default to score if it's not defined
query.addSortField("score", SolrQuery.ORDER.desc);
//If two articles are ranked the same, give the one with a more recent publish date a bump
query.addSortField("publication_date", SolrQuery.ORDER.desc);
//If everything else is equal, order by id
query.addSortField("id", SolrQuery.ORDER.desc);
}
}
/**
* Execute a Solr search composed from the contents of the <i>Find An Article</i> search block including the
* properties: <code>volume</code>, <code>eNumber</code>, and/or <code>id</code> (DOI).
* <p/>
* The query is filtered by the <code>SearchParameters.filterJournals</code> property also contained in the
* <code>searchParameters</code> parameter.
* <p/>
* No filter is created for date ranges or subject categories.
*
* @param searchParameters Contains all the parameters necessary to execute a search against the Solr query engine
* @return A subset (determined by <code>SearchParameters.startPage</code> and <code>SearchParameters.pageSize</code>
* of the results of the Solr query generated from the contents of the <code>searchParameters</code>
* parameter
* @throws ApplicationException Thrown during failed interactions with the Solr Server
*/
public SearchResultSinglePage findAnArticleSearch(SearchParameters searchParameters) throws ApplicationException {
SearchParameters sp = cleanStrings(searchParameters); // Does not impact unformattedQuery field.
if (log.isDebugEnabled()) {
log.debug("Solr Search performed on the following selection of the SearchParameters properties: "
+ "{ filterJournals=" + (sp.getFilterJournals() == null ? null : Arrays.asList(sp.getFilterJournals()))
+ "\', volume = " + sp.getVolume()
+ "\', eLocationId = " + sp.getELocationId()
+ "\', id = " + sp.getId() + "\' }");
}
// We should always have exactly one journal.
if (sp.getFilterJournals().length != 1) {
throw new ApplicationException("Please select exactly one journal.");
}
SolrQuery query = createQuery(null, sp.getStartPage(), sp.getPageSize(), false);
// If ID exists, then search on that first, ignoring all the other fields.
if (sp.getId().length() > 0) {
query.setQuery("id:\"" + sp.getId() + "\"");
return search(query);
//if (resultsFromId.getTotalNoOfResults() > 0) {
// return resultsFromId;
//}
}
// If no ID or if ID search gives no results,
// then attempt a query based on the other submitted fields, if those fields exist
int volume = 0;
try {
volume = Integer.parseInt(sp.getVolume());
} catch (Exception e) {
if (log.isDebugEnabled()) {
log.debug("Unable to create an integer from the String volume = " + sp.getVolume());
}
}
StringBuilder q = new StringBuilder(); // The Query which will be submitted to Solr.
if (volume > 0) {
q.append(" volume:").append(volume);
}
if (sp.getELocationId().length() > 0) {
if (q.length() > 0) {
q.append(" AND ");
}
q.append(" elocation_id:").append(sp.getELocationId());
}
if (log.isDebugEnabled()) {
log.debug("findAnArticleSearch: query = " + q.toString());
}
query.setQuery(q.toString());
// Form field description: "Journals". Query Filter.
query.addFilterQuery(createFilterLimitForJournals(sp.getFilterJournals()));
return search(query);
}
public void setConfiguration(Configuration config) throws ApplicationException {
queryTimeout = config.getInt("ambra.services.search.timeout", 60000); // default to 1 min
List sizes = config.getList("ambra.services.search.pageSizes.size");
if (sizes == null) {
throw new ApplicationException("ambra.services.search.pageSizes not defined " +
"in configuration.");
}
pageSizes = sizes;
if (config.containsKey("ambra.services.search.sortOptions.option")) {
validSorts = new HashMap();
displaySorts = new ArrayList();
HierarchicalConfiguration hc = (HierarchicalConfiguration) config;
List<HierarchicalConfiguration> sorts =
hc.configurationsAt("ambra.services.search.sortOptions.option");
for (HierarchicalConfiguration s : sorts) {
String key = s.getString("[@displayName]");
String value = s.getString("");
validSorts.put(key, value);
displaySorts.add(key);
}
((HierarchicalConfiguration) config).setExpressionEngine(null);
} else {
throw new ApplicationException("ambra.services.search.sortOptions.option not defined " +
"in configuration.");
}
if (config.containsKey("ambra.services.search.keywordFields.field")) {
validKeywords = new HashMap();
HierarchicalConfiguration hc = (HierarchicalConfiguration) config;
List<HierarchicalConfiguration> sorts =
hc.configurationsAt("ambra.services.search.keywordFields.field");
for (HierarchicalConfiguration s : sorts) {
String key = s.getString("[@displayName]");
String value = s.getString("");
validKeywords.put(key, value);
}
} else {
throw new ApplicationException("ambra.services.search.keywordFields.field not defined " +
"in configuration.");
}
}
public void setServerFactory(SolrServerFactory serverFactory) {
this.serverFactory = serverFactory;
}
private void setFilters(SolrQuery query, SearchParameters sp, boolean ignoreJournals, boolean ignoreArticleTypes) {
//Related to JO: http://joborder.plos.org/view.php?id=17480
//(for now) we don't want to search on Issue Images
query.addFilterQuery(createFilterNoIssueImageDocuments());
// Form field description: "Journals". Query Filter.
if(!ignoreJournals) {
if (sp.getFilterJournals() != null && sp.getFilterJournals().length > 0) {
query.addFilterQuery(createFilterLimitForJournals(sp.getFilterJournals()));
}
}
if(!ignoreArticleTypes) {
// Form field description: "Article Types". Query Filter.
if (sp.getFilterArticleTypes() != null && sp.getFilterArticleTypes().length > 0) {
query.addFilterQuery(createFilterLimitForArticleTypes(sp.getFilterArticleTypes()));
}
}
// Form field description: "Subject Categories". Query Filter.
if (sp.getFilterSubjects() != null && sp.getFilterSubjects().length > 0) {
query.addFilterQuery(createFilterLimitForSubject(sp.getFilterSubjects()));
}
// Not used in form, but in savedSearch alerts
if (sp.getFilterSubjectsDisjunction() != null && sp.getFilterSubjectsDisjunction().length > 0) {
query.addFilterQuery(createFilterLimitForSubjectDisjunction(sp.getFilterSubjectsDisjunction()));
}
// Form field description: "Authors". Query Filter.
if (sp.getFilterAuthors() != null && sp.getFilterAuthors().length > 0) {
query.addFilterQuery(createFilterLimitForAuthor(sp.getFilterAuthors()));
}
if (sp.getFilterStartDate() != null && sp.getFilterEndDate() != null) {
query.addFilterQuery(createFilterLimitForPublishDate(sp.getFilterStartDate(), sp.getFilterEndDate()));
}
}
private String createFilterLimitForPublishDate(Date startDate, Date endDate) {
StringBuilder fq = new StringBuilder();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
sdf.setTimeZone(TimeZone.getTimeZone("UTC"));
String sDate = sdf.format(startDate) + "T00:00:00Z";
String eDate = sdf.format(endDate) + "T00:00:00Z";
fq.append("publication_date:[" + sDate + " TO " + eDate + "]");
return fq.toString();
}
private String createFilterLimitForJournals(String[] journals) {
Arrays.sort(journals); // Consistent order so that each filter will only be cached once.
StringBuilder fq = new StringBuilder();
for (String journal : journals) {
fq.append("cross_published_journal_key:").append(journal).append(" OR ");
}
return fq.replace(fq.length() - 4, fq.length(), "").toString(); // Remove last " OR".
}
private String createFilterLimitForAuthor(String[] authors) {
Arrays.sort(authors); // Consistent order so that each filter will only be cached once.
StringBuilder fq = new StringBuilder();
for (String author : authors) {
fq.append("author:\"").append(author).append("\" AND ");
}
return fq.replace(fq.length() - 5, fq.length(), "").toString(); // Remove last " AND".
}
private String createFilterLimitForSubject(String[] subjects) {
Arrays.sort(subjects); // Consistent order so that each filter will only be cached once.
StringBuilder fq = new StringBuilder();
for (String category : subjects) {
fq.append("subject:\"").append(category).append("\" AND ");
}
return fq.replace(fq.length() - 5, fq.length(), "").toString(); // Remove last " AND".
}
private String createFilterLimitForSubjectDisjunction(String[] subjects) {
Arrays.sort(subjects); // Consistent order so that each filter will only be cached once.
StringBuilder fq = new StringBuilder();
for (String category : subjects) {
fq.append("subject:\"").append(category).append("\" OR ");
}
return fq.replace(fq.length() - 4, fq.length(), "").toString(); // Remove last " OR".
}
private String createFilterLimitForArticleTypes(String[] articleTypes) {
Arrays.sort(articleTypes); // Consistent order so that each filter will only be cached once.
StringBuilder fq = new StringBuilder();
for (String articleType : articleTypes) {
fq.append("article_type:\"").append(articleType).append("\" OR ");
}
return fq.replace(fq.length() - 4, fq.length(), "").toString(); // Remove last " OR".
}
/**
* Filter that limits results to only the complete documents, excluding partial documents.
*
* @return A filter that excludes partial documents
*/
private String createFilterFullDocuments() {
return "doc_type:full";
}
private String createFilterPartialDocuments() {
return "doc_type:partial";
}
private String createFilterNoIssueImageDocuments() {
return "!article_type_facet:\"Issue Image\"";
}
private QueryResponse getSOLRResponse(SolrQuery query) throws ApplicationException {
if (serverFactory.getServer() == null) {
throw new ApplicationException("Search server is not configured");
}
QueryResponse queryResponse;
try {
log.info("SOLR Query: " + query.toString());
queryResponse = serverFactory.getServer().query(query);
log.info("SOLR Query response time(milliseconds): " + queryResponse.getElapsedTime());
} catch (SolrServerException e) {
throw new ApplicationException("Unable to execute a query on the Solr Server.", e);
}
return queryResponse;
}
private SearchResultSinglePage search(SolrQuery query) throws ApplicationException {
QueryResponse queryResponse = getSOLRResponse(query);
return readQueryResults(queryResponse, query);
}
private FacetField facetSearch(SolrQuery query, String name) throws ApplicationException {
QueryResponse queryResponse = getSOLRResponse(query);
FacetField facet = queryResponse.getFacetField(name);
if (facet == null) {
throw new ApplicationException("No facet found with name of:" + name);
}
return facet;
}
private List<Map> facetCountsToHashMap(FacetField field) {
List<FacetField.Count> counts = field.getValues();
ArrayList<Map> result = new ArrayList<Map>();
if (counts != null) {
for (FacetField.Count count : counts) {
HashMap<String, Object> hm = new HashMap<String, Object>();
hm.put("name", count.getName());
hm.put("count", count.getCount());
result.add(hm);
}
return result;
} else {
return null;
}
}
private SolrQuery createQuery(String queryString, int startPage, int pageSize, boolean useDismax) {
SolrQuery query = new SolrQuery(queryString);
query.setTimeAllowed(queryTimeout);
query.setIncludeScore(true); // The relevance (of each results element) to the search terms.
query.setHighlight(false);
if (useDismax) {
query.set("defType", "dismax");
}
//TODO: Put The "options" from the "queryField" picklist into a config file.
//This list matches the "options" from the "queryField" picklist on unformattedSearch.ftl,
//without the "date" fields.
query.setStart(startPage * pageSize); // Which results element to return first in this batch.
query.setRows(pageSize); // The number of results elements to return.
// request only fields that we need to display
query.setFields("id", "score", "title_display", "publication_date", "eissn", "journal", "article_type",
"author_display", "abstract", "abstract_primary_display", "striking_image", "figure_table_caption",
"subject", "expression_of_concern", "retraction");
query.addFacetField("subject_facet");
query.addFacetField("author_facet");
query.addFacetField("editor_facet");
query.addFacetField("article_type_facet");
query.addFacetField("affiliate_facet");
query.set("facet.method", "fc");
query.setFacetLimit(MAX_FACET_SIZE);
query.setFacetMinCount(MIN_FACET_COUNT);
// Add a filter to ensure that Solr never returns partial documents
query.addFilterQuery(createFilterFullDocuments());
return query;
}
private SolrQuery createFacetsQuery(String queryString, String field, boolean useDismax) {
SolrQuery query = new SolrQuery(queryString);
query.setTimeAllowed(queryTimeout);
query.setIncludeScore(false);
query.setHighlight(false);
query.setRows(0);
query.setFacetLimit(MAX_FACET_SIZE);
query.setFacetMinCount(MIN_FACET_COUNT);
if (useDismax) {
query.set("defType", "dismax");
}
query.addFacetField(field);
// Add a filter to ensure that Solr never returns partial documents
query.addFilterQuery(createFilterFullDocuments());
return query;
}
private SolrQuery createKeywordFacetQuery(String queryString) {
SolrQuery query = new SolrQuery();
query.setTimeAllowed(queryTimeout);
query.setIncludeScore(false);
query.setHighlight(false);
query.setRows(0);
query.set("defType", "dismax");
query.set("qf", "doc_partial_body");
query.addFacetField("doc_partial_type");
query.setFacetLimit(MAX_FACET_SIZE);
query.setFacetMinCount(MIN_FACET_COUNT);
// Add a filter to ensure that Solr never returns partial documents
query.addFilterQuery(createFilterPartialDocuments());
query.setQuery(queryString);
return query;
}
@SuppressWarnings("unchecked")
private SearchResultSinglePage readQueryResults(QueryResponse queryResponse, SolrQuery query) {
SolrDocumentList documentList = queryResponse.getResults();
if (log.isInfoEnabled()) {
StringBuilder filterQueriesForLog = new StringBuilder();
if (query.getFilterQueries() != null && query.getFilterQueries().length > 0) {
for (String filterQuery : query.getFilterQueries()) {
filterQueriesForLog.append(filterQuery).append(" , ");
}
if (filterQueriesForLog.length() > 3) {
filterQueriesForLog.replace(filterQueriesForLog.length() - 3, filterQueriesForLog.length(), "");
} else {
filterQueriesForLog.append("No Filter Queries");
}
}
log.info("query.getQuery():{ " + query.getQuery() + " }"
+ ", query.getSortFields():{ " + (query.getSortFields() == null ? null : Arrays.asList(query.getSortFields())) + " }"
+ ", query.getFilterQueries():{ " + filterQueriesForLog.toString() + " }"
+ ", found:" + documentList.getNumFound()
+ ", start:" + documentList.getStart()
+ ", max_score:" + documentList.getMaxScore()
+ ", QTime:" + queryResponse.getQTime() + "ms");
// TODO: implement spell-checking in a meaningful manner. This loop exists only to generate log output.
// TODO: Add "spellcheckAlternatives" or something like it to the SearchHits class so it can be displayed to the user like Google's "did you mean..."
// TODO: Turn off spellchecking for the "author" field.
if (queryResponse.getSpellCheckResponse() != null
&& queryResponse.getSpellCheckResponse().getSuggestionMap() != null
&& queryResponse.getSpellCheckResponse().getSuggestionMap().keySet().size() > 0) {
StringBuilder sb = new StringBuilder("Spellcheck alternative suggestions:");
for (String token : queryResponse.getSpellCheckResponse().getSuggestionMap().keySet()) {
sb.append(" { ").append(token).append(" : ");
if (queryResponse.getSpellCheckResponse().getSuggestionMap().get(token).getAlternatives().size() < 1) {
sb.append("NO ALTERNATIVES");
} else {
for (String alternative : queryResponse.getSpellCheckResponse().getSuggestionMap().get(token).getAlternatives()) {
sb.append(alternative).append(", ");
}
sb.replace(sb.length() - 2, sb.length(), ""); // Remove last comma and space.
}
sb.append(" } ,");
}
log.info(sb.replace(sb.length() - 2, sb.length(), "").toString()); // Remove last comma and space.
} else {
log.info("Solr thinks everything in the query is spelled correctly.");
}
}
List<SearchHit> searchResults = new ArrayList<SearchHit>();
for (SolrDocument document : documentList) {
String id = SolrServiceUtil.getFieldValue(document, "id", String.class, query.toString());
String message = id == null ? query.toString() : id;
Float score = SolrServiceUtil.getFieldValue(document, "score", Float.class, message);
String title = SolrServiceUtil.getFieldValue(document, "title_display", String.class, message);
Date publicationDate = SolrServiceUtil.getFieldValue(document, "publication_date", Date.class, message);
String eissn = SolrServiceUtil.getFieldValue(document, "eissn", String.class, message);
String journal = SolrServiceUtil.getFieldValue(document, "journal", String.class, message);
String articleType = SolrServiceUtil.getFieldValue(document, "article_type", String.class, message);
String strikingImage = SolrServiceUtil.getFieldValue(document, "striking_image", String.class, message);
List<String> abstractText = SolrServiceUtil.getFieldMultiValue(document, "abstract", String.class, message);
List<String> abstractPrimary = SolrServiceUtil.getFieldMultiValue(document, "abstract_primary_display", String.class, message);
List<String> authorList = SolrServiceUtil.getFieldMultiValue(document, "author_display", String.class, message);
// TODO create a dedicated field for checking the existence of assets for a given article.
List<String> figureTableCaptions = SolrServiceUtil.getFieldMultiValue(document, "figure_table_caption", String.class, message);
List<String> subjects = SolrServiceUtil.getFieldMultiValue(document, "subject", String.class, message);
List<String> expressionOfconcern = SolrServiceUtil.getFieldMultiValue(document, "expression_of_concern", String.class, message);
String retraction = SolrServiceUtil.getFieldValue(document, "retraction", String.class, message);
String abstractResult = "";
//Use the primary abstract if it exists
if (abstractPrimary.size() > 0) {
abstractResult = StringUtils.join(abstractPrimary, ", ");
} else {
if (abstractText.size() > 0) {
abstractResult = StringUtils.join(abstractText, ", ");
}
}
//Flatten the list of subjects to a unique set
Set<String> flattenedSubjects = new HashSet<String>();
for(String subject : subjects) {
for(String temp : subject.split("/")) {
if(temp.trim().length() > 0) {
flattenedSubjects.add(temp);
}
}
}
SearchHit hit = SearchHit.builder()
.setHitScore(score)
.setUri(id)
.setTitle(title)
.setListOfCreators(authorList)
.setDate(publicationDate)
.setIssn(eissn)
.setJournalTitle(journal)
.setArticleTypeForDisplay(articleType)
.setAbstractText(abstractResult)
.setStrikingImage(strikingImage)
.setHasAssets(figureTableCaptions.size() > 0)
.setSubjects(flattenedSubjects)
.setSubjectsPolyhierarchy(subjects)
.setExpressionOfConcern(expressionOfconcern)
.setRetraction(retraction)
.build();
if (log.isDebugEnabled())
log.debug(hit.toString());
searchResults.add(hit);
}
//here we assume that number of hits is always going to be withing range of int
SearchResultSinglePage results = new SearchResultSinglePage((int) documentList.getNumFound(), -1,
searchResults, query.getQuery());
if (queryResponse.getFacetField("subject_facet") != null) {
List<Map> subjects = facetCountsToHashMap(queryResponse.getFacetField("subject_facet"));
if(subjects != null) {
List<Map> subjectResult = new ArrayList<Map>();
SortedMap<String, Long> topSubjects = null;
try {
topSubjects = getTopSubjects();
} catch (ApplicationException ex) {
throw new RuntimeException(ex.getMessage(), ex);
}
//Remove top level 1 subjects from list, FEND-805
for(Map<String, Object> m : subjects) {
if(!topSubjects.containsKey(m.get("name"))) {
HashMap<String, Object> hm = new HashMap<String, Object>();
hm.put("name", m.get("name"));
hm.put("count", m.get("count"));
subjectResult.add(hm);
}
}
results.setSubjectFacet(subjectResult);
} else {
results.setSubjectFacet(null);
}
}
if (queryResponse.getFacetField("author_facet") != null) {
results.setAuthorFacet(facetCountsToHashMap(queryResponse.getFacetField("author_facet")));
}
if (queryResponse.getFacetField("editor_facet") != null) {
results.setEditorFacet(facetCountsToHashMap(queryResponse.getFacetField("editor_facet")));
}
if (queryResponse.getFacetField("article_type_facet") != null) {
results.setArticleTypeFacet(facetCountsToHashMap(queryResponse.getFacetField("article_type_facet")));
}
if (queryResponse.getFacetField("affiliate_facet") != null) {
results.setInstitutionFacet(facetCountsToHashMap(queryResponse.getFacetField("affiliate_facet")));
}
if (queryResponse.getFacetField("cross_published_journal_key") != null) {
results.setJournalFacet(facetCountsToHashMap(queryResponse.getFacetField("cross_published_journal_key")));
}
return results;
}
/**
* @inheritDoc
*/
public List savedSearchAlerts(SearchParameters sParams, Date lastSearchTime, Date currentSearchTime, int resultLimit) throws ApplicationException {
SolrQuery query = null;
SearchParameters sp = null;
if (sParams.getUnformattedQuery() == null || sParams.getUnformattedQuery().equals("")) {
if (log.isDebugEnabled()) {
log.debug("Simple Saved Search performed on the unformattedSearch String: "
+ sParams.getQuery().trim());
}
query = createQuery(sParams.getQuery(), 0, resultLimit, false);
query.setQuery(sParams.getQuery());
//If the keywords parameter is specified, we need to change what field we're querying against
//aka, body, conclusions, materials and methods ... etc ...
if (sParams.getFilterKeyword().length() > 0) {
String fieldkey = sParams.getFilterKeyword();
if (!validKeywords.containsKey(fieldkey)) {
throw new ApplicationException("Invalid filterKeyword value of " +
fieldkey + " specified");
}
String fieldName = (String) validKeywords.get(fieldkey);
//Set the field for dismax to use
query.set("qf", fieldName);
}
setFilters(query, sParams, false, false);
} else {
log.debug("Advanced Saved Search performed on the unformattedSearch String: {}",
sParams.getUnformattedQuery().trim());
sp = cleanStrings(sParams);
query = createQuery(null, 0, resultLimit, false);
query.setQuery(sParams.getUnformattedQuery());
setFilters(query, sp, false, false);
}
query.addFilterQuery(createFilterLimitForPublishDate(lastSearchTime, currentSearchTime));
SearchResultSinglePage results = search(query);
return results.getHits();
}
/**
* Remove dangerous and unwanted values from the Strings in selected fields in the SearchParameters parameter.
* <p/>
* Note that <code>SearchParameters.unformattedQuery</code> is excluded from this list, for the reason implied by its
* name.
*
* @param searchParameters A SearchParameters object the needs to have some of its fields "cleaned"
* @return The SearchParameters parameter with some of its fields "cleaned"
*/
private SearchParameters cleanStrings(SearchParameters searchParameters) {
SearchParameters sp = searchParameters.copy();
sp.setQuery(cleanString(searchParameters.getQuery()));
return sp;
}
/**
* Change all input to lower case and, in front of each character that Solr recognizes as an operator, place a
* backslash (i.e., \) so that these characters are "escaped" such that they may be used as normal characters in
* searches.
* <p/>
* Since Solr uses upper case to define the operators <code>AND</code>, <code>OR</code>, <code>NOT</code>, and
* <code>TO</code>, setting these values to lower case means that they are not seen as operators by Solr.
*
* @param toBeCleaned String that will have each Solr operator-character "escaped" with a backslash
* @return The original <code>toBeCleaned</code> object with each Solr operator-character "escaped" with a backslash
*/
private String cleanString(String toBeCleaned) {
return toBeCleaned.replaceAll("[:!&\"\'\\^\\+\\-\\|\\(\\)\\[\\]\\{\\}\\\\]", "\\\\$0").toLowerCase();
}
/**
* The map of sorts that are valid for this provider
*
* @return
*/
public List getSorts() {
return this.displaySorts;
}
/**
* The valid page sizes for this provider
*
* @return
*/
public List getPageSizes() {
return pageSizes;
}
private static final String DOI_SCHEME = "info:doi/";
@Override
public String fetchAbstractText(String articleDoi) throws ApplicationException {
if (articleDoi.startsWith(DOI_SCHEME)) {
articleDoi = articleDoi.substring(DOI_SCHEME.length());
}
SolrQuery query = new SolrQuery("id:\"" + articleDoi + "\"");
query.setFields("abstract", "abstract_primary_display");
List<SearchHit> hits = search(query).getHits();
if (hits.size() != 1) {
String message = (hits.isEmpty()) ? "Article not found" : "Non-unique ID";
throw new ApplicationException(message + ": " + articleDoi);
}
String abstractText = hits.get(0).getAbstract();
if (abstractText == null) {
// Even an article with no abstract should have produced an empty (non-null) string
throw new ApplicationException("Abstract not found for article: " + articleDoi);
}
return abstractText;
}
public void setCache(Cache cache) {
this.cache = cache;
}
}