package edu.harvard.iq.dataverse.api; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.search.FacetCategory; import edu.harvard.iq.dataverse.search.FacetLabel; import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.search.SearchServiceBean; import edu.harvard.iq.dataverse.search.SolrQueryResponse; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.authorization.users.GuestUser; import edu.harvard.iq.dataverse.authorization.users.User; import edu.harvard.iq.dataverse.search.SearchConstants; import edu.harvard.iq.dataverse.search.SearchException; import edu.harvard.iq.dataverse.search.SearchUtil; import edu.harvard.iq.dataverse.search.SolrIndexServiceBean; import edu.harvard.iq.dataverse.search.SortBy; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.logging.Logger; import javax.ejb.EJB; import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; import javax.servlet.http.HttpServletResponse; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.QueryParam; import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import org.apache.commons.lang.StringUtils; /** * User-facing documentation: * <a href="http://guides.dataverse.org/en/latest/api/search.html">http://guides.dataverse.org/en/latest/api/search.html</a> */ @Path("search") public class Search extends AbstractApiBean { private static final Logger logger = Logger.getLogger(Search.class.getCanonicalName()); @EJB SearchServiceBean searchService; @EJB DataverseServiceBean dataverseService; @EJB DvObjectServiceBean dvObjectService; @EJB SolrIndexServiceBean SolrIndexService; @GET public Response search( @QueryParam("q") String query, @QueryParam("type") final List<String> types, @QueryParam("subtree") String subtreeRequested, @QueryParam("sort") String sortField, @QueryParam("order") String sortOrder, @QueryParam("per_page") final int numResultsPerPageRequested, @QueryParam("start") final int paginationStart, @QueryParam("show_relevance") boolean showRelevance, @QueryParam("show_facets") boolean showFacets, @QueryParam("fq") final List<String> filterQueries, @QueryParam("show_entity_ids") boolean showEntityIds, @QueryParam("show_api_urls") boolean showApiUrls, @QueryParam("show_my_data") boolean showMyData, @Context HttpServletResponse response ) { User user; try { user = getUser(); } catch (WrappedResponse ex) { return ex.getResponse(); } if (query != null) { // sanity checking on user-supplied arguments SortBy sortBy; int numResultsPerPage; Dataverse subtree; try { if (!types.isEmpty()) { filterQueries.add(getFilterQueryFromTypes(types)); } sortBy = SearchUtil.getSortBy(sortField, sortOrder); numResultsPerPage = getNumberOfResultsPerPage(numResultsPerPageRequested); subtree = getSubtree(subtreeRequested); if (!subtree.equals(dataverseService.findRootDataverse())) { String dataversePath = dataverseService.determineDataversePath(subtree); String filterDownToSubtree = SearchFields.SUBTREE + ":\"" + dataversePath + "\""; /** * @todo Should filterDownToSubtree logic be centralized in * SearchServiceBean? */ filterQueries.add(filterDownToSubtree); } } catch (Exception ex) { return error(Response.Status.BAD_REQUEST, ex.getLocalizedMessage()); } // users can't change these (yet anyway) boolean dataRelatedToMe = showMyData; //getDataRelatedToMe(); SolrQueryResponse solrQueryResponse; try { solrQueryResponse = searchService.search( createDataverseRequest(user), subtree, query, filterQueries, sortBy.getField(), sortBy.getOrder(), paginationStart, dataRelatedToMe, numResultsPerPage ); } catch (SearchException ex) { Throwable cause = ex; StringBuilder sb = new StringBuilder(); sb.append(cause + " "); while (cause.getCause() != null) { cause = cause.getCause(); sb.append(cause.getClass().getCanonicalName() + " "); sb.append(cause + " "); // if you search for a colon you see RemoteSolrException: org.apache.solr.search.SyntaxError: Cannot parse ':' } String message = "Exception running search for [" + query + "] with filterQueries " + filterQueries + " and paginationStart [" + paginationStart + "]: " + sb.toString(); logger.info(message); return error(Response.Status.INTERNAL_SERVER_ERROR, message); } JsonArrayBuilder itemsArrayBuilder = Json.createArrayBuilder(); List<SolrSearchResult> solrSearchResults = solrQueryResponse.getSolrSearchResults(); for (SolrSearchResult solrSearchResult : solrSearchResults) { itemsArrayBuilder.add(solrSearchResult.toJsonObject(showRelevance, showEntityIds, showApiUrls)); } JsonObjectBuilder spelling_alternatives = Json.createObjectBuilder(); for (Map.Entry<String, List<String>> entry : solrQueryResponse.getSpellingSuggestionsByToken().entrySet()) { spelling_alternatives.add(entry.getKey(), entry.getValue().toString()); } JsonArrayBuilder facets = Json.createArrayBuilder(); JsonObjectBuilder facetCategoryBuilder = Json.createObjectBuilder(); for (FacetCategory facetCategory : solrQueryResponse.getFacetCategoryList()) { JsonObjectBuilder facetCategoryBuilderFriendlyPlusData = Json.createObjectBuilder(); JsonArrayBuilder facetLabelBuilderData = Json.createArrayBuilder(); for (FacetLabel facetLabel : facetCategory.getFacetLabel()) { JsonObjectBuilder countBuilder = Json.createObjectBuilder(); countBuilder.add(facetLabel.getName(), facetLabel.getCount()); facetLabelBuilderData.add(countBuilder); } facetCategoryBuilderFriendlyPlusData.add("friendly", facetCategory.getFriendlyName()); facetCategoryBuilderFriendlyPlusData.add("labels", facetLabelBuilderData); facetCategoryBuilder.add(facetCategory.getName(), facetCategoryBuilderFriendlyPlusData); } facets.add(facetCategoryBuilder); JsonObjectBuilder value = Json.createObjectBuilder() .add("q", query) .add("total_count", solrQueryResponse.getNumResultsFound()) .add("start", solrQueryResponse.getResultsStart()) .add("spelling_alternatives", spelling_alternatives) .add("items", itemsArrayBuilder.build()); if (showFacets) { value.add("facets", facets); } value.add("count_in_response", solrSearchResults.size()); /** * @todo Returning the fq might be useful as a troubleshooting aid * but we don't want to expose the raw dataverse database ids in * "subtree_ss" path like "/2/3". */ // value.add("fq_provided", filterQueries.toString()); if (solrQueryResponse.getError() != null) { /** * @todo You get here if you pass only ":" as a query, for * example. Should we return more or better information? */ return error(Response.Status.BAD_REQUEST, solrQueryResponse.getError()); } response.setHeader("Access-Control-Allow-Origin", "*"); return ok(value); } else { return error(Response.Status.BAD_REQUEST, "q parameter is missing"); } } private User getUser() throws WrappedResponse { /** * @todo support searching as non-guest: * https://github.com/IQSS/dataverse/issues/1299 * * Note that superusers can't currently use the Search API because they * see permission documents (all Solr documents, really) and we get a * NPE when trying to determine the DvObject type if their query matches * a permission document. * * @todo Check back on https://github.com/IQSS/dataverse/issues/1838 for * when/if the Search API is opened up to not require a key. */ AuthenticatedUser authenticatedUser = findAuthenticatedUserOrDie(); if (nonPublicSearchAllowed()) { return authenticatedUser; } else { return GuestUser.get(); } } public boolean nonPublicSearchAllowed() { boolean safeDefaultIfKeyNotFound = false; return settingsSvc.isTrueForKey(SettingsServiceBean.Key.SearchApiNonPublicAllowed, safeDefaultIfKeyNotFound); } private boolean getDataRelatedToMe() { /** * @todo support Data Related To Me: * https://github.com/IQSS/dataverse/issues/1299 */ boolean dataRelatedToMe = false; return dataRelatedToMe; } private int getNumberOfResultsPerPage(int numResultsPerPage) { /** * @todo should maxLimit be configurable? */ int maxLimit = 1000; if (numResultsPerPage == 0) { /** * @todo should defaultLimit be configurable? */ int defaultLimit = 10; return defaultLimit; } else if (numResultsPerPage < 0) { throw new IllegalArgumentException(numResultsPerPage + " results per page requested but can not be less than zero."); } else if (numResultsPerPage > maxLimit) { /** * @todo numbers higher than 2147483647 emit HTML rather than the * expected JSON response below. * * It also returns a 404 but * http://docs.oracle.com/javaee/7/tutorial/jaxrs002.htm says 'an * HTTP 400 ("Client Error") response is returned' if an int "cannot * be parsed as a 32-bit signed integer". * * Is this perhaps due a change to web.xml and all the prettyfaces * stuff in https://github.com/IQSS/dataverse/issues/958 ? * */ throw new IllegalArgumentException(numResultsPerPage + " results per page requested but max limit is " + maxLimit + "."); } else { // ok, fine, you get what you asked for return numResultsPerPage; } } private String getFilterQueryFromTypes(List<String> types) throws Exception { String filterQuery = null; List<String> typeRequested = new ArrayList<>(); List<String> validTypes = Arrays.asList(SearchConstants.DATAVERSE, SearchConstants.DATASET, SearchConstants.FILE); for (String type : types) { if (validTypes.contains(type)) { if (type.equals(SearchConstants.DATAVERSE)) { typeRequested.add(SearchConstants.DATAVERSES); } else if (type.equals(SearchConstants.DATASET)) { typeRequested.add(SearchConstants.DATASETS); } else if (type.equals(SearchConstants.FILE)) { typeRequested.add(SearchConstants.FILES); } } else { throw new Exception("Invalid type '" + type + "'. Must be one of " + validTypes); } } filterQuery = SearchFields.TYPE + ":(" + StringUtils.join(typeRequested, " OR ") + ")"; return filterQuery; } private Dataverse getSubtree(String alias) throws Exception { if (StringUtils.isBlank(alias)) { return dataverseService.findRootDataverse(); } else { Dataverse subtree = dataverseService.findByAlias(alias); if (subtree != null) { return subtree; } else { throw new Exception("Could not find dataverse with alias " + alias); } } } }