package edu.harvard.iq.dataverse.search;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DataFileServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldConstant;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.DatasetVersionServiceBean;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseFacet;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.DvObjectServiceBean;
import edu.harvard.iq.dataverse.authorization.groups.Group;
import edu.harvard.iq.dataverse.authorization.groups.GroupServiceBean;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.PrivateUrlUser;
import edu.harvard.iq.dataverse.authorization.users.GuestUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.util.JsfHelper;
import edu.harvard.iq.dataverse.util.SystemConfig;
import java.lang.reflect.Field;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.ListIterator;
import java.util.Map;
import java.util.Set;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.annotation.PostConstruct;
import javax.annotation.PreDestroy;
import javax.ejb.EJB;
import javax.ejb.EJBTransactionRolledbackException;
import javax.ejb.Stateless;
import javax.ejb.TransactionRolledbackLocalException;
import javax.inject.Named;
import javax.persistence.NoResultException;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrQuery.SortClause;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.impl.HttpSolrServer.RemoteSolrException;
import org.apache.solr.client.solrj.response.FacetField;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.response.RangeFacet;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
@Stateless
@Named
public class SearchServiceBean {
private static final Logger logger = Logger.getLogger(SearchServiceBean.class.getCanonicalName());
/**
* We're trying to make the SearchServiceBean lean, mean, and fast, with as
* few injections of EJBs as possible.
*/
/**
* @todo Can we do without the DatasetFieldServiceBean?
*/
@EJB
DvObjectServiceBean dvObjectService;
@EJB
DataverseServiceBean dataverseService;
@EJB
DatasetServiceBean datasetService;
@EJB
DatasetVersionServiceBean datasetVersionService;
@EJB
DataFileServiceBean dataFileService;
@EJB
DatasetFieldServiceBean datasetFieldService;
@EJB
GroupServiceBean groupService;
@EJB
SystemConfig systemConfig;
public static final JsfHelper JH = new JsfHelper();
private SolrServer solrServer;
@PostConstruct
public void init(){
solrServer = new HttpSolrServer("http://" + systemConfig.getSolrHostColonPort() + "/solr");
}
@PreDestroy
public void close(){
if(solrServer != null){
solrServer.shutdown();
solrServer = null;
}
}
/**
* Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing
* access to Private Data for the correct user
*
* In other words "onlyDatatRelatedToMe", negates other filter Queries
* related to permissions
*
*
* @param dataverseRequest
* @param dataverse
* @param query
* @param filterQueries
* @param sortField
* @param sortOrder
* @param paginationStart
* @param onlyDatatRelatedToMe
* @param numResultsPerPage
* @return
* @throws SearchException
*/
public SolrQueryResponse search(DataverseRequest dataverseRequest, Dataverse dataverse, String query, List<String> filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage) throws SearchException {
return search(dataverseRequest, dataverse, query, filterQueries, sortField, sortOrder, paginationStart, onlyDatatRelatedToMe, numResultsPerPage, true);
}
/**
* Import note: "onlyDatatRelatedToMe" relies on filterQueries for providing
* access to Private Data for the correct user
*
* In other words "onlyDatatRelatedToMe", negates other filter Queries
* related to permissions
*
*
* @param user
* @param dataverse
* @param query
* @param filterQueries
* @param sortField
* @param sortOrder
* @param paginationStart
* @param onlyDatatRelatedToMe
* @param numResultsPerPage
* @param retrieveEntities - look up dvobject entities with .find() (potentially expensive!)
* @return
* @throws SearchException
*/
public SolrQueryResponse search(DataverseRequest dataverseRequest, Dataverse dataverse, String query, List<String> filterQueries, String sortField, String sortOrder, int paginationStart, boolean onlyDatatRelatedToMe, int numResultsPerPage, boolean retrieveEntities) throws SearchException {
if (paginationStart < 0) {
throw new IllegalArgumentException("paginationStart must be 0 or greater");
}
if (numResultsPerPage < 1) {
throw new IllegalArgumentException("numResultsPerPage must be 1 or greater");
}
SolrQuery solrQuery = new SolrQuery();
query = SearchUtil.sanitizeQuery(query);
solrQuery.setQuery(query);
// SortClause foo = new SortClause("name", SolrQuery.ORDER.desc);
// if (query.equals("*") || query.equals("*:*")) {
// solrQuery.setSort(new SortClause(SearchFields.NAME_SORT, SolrQuery.ORDER.asc));
solrQuery.setSort(new SortClause(sortField, sortOrder));
// } else {
// solrQuery.setSort(sortClause);
// }
// solrQuery.setSort(sortClause);
solrQuery.setHighlight(true).setHighlightSnippets(1);
Integer fragSize = systemConfig.getSearchHighlightFragmentSize();
if (fragSize != null) {
solrQuery.setHighlightFragsize(fragSize);
}
solrQuery.setHighlightSimplePre("<span class=\"search-term-match\">");
solrQuery.setHighlightSimplePost("</span>");
Map<String, String> solrFieldsToHightlightOnMap = new HashMap<>();
solrFieldsToHightlightOnMap.put(SearchFields.NAME, "Name");
solrFieldsToHightlightOnMap.put(SearchFields.AFFILIATION, "Affiliation");
solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_FRIENDLY, "File Type");
solrFieldsToHightlightOnMap.put(SearchFields.DESCRIPTION, "Description");
solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_NAME, "Variable Name");
solrFieldsToHightlightOnMap.put(SearchFields.VARIABLE_LABEL, "Variable Label");
solrFieldsToHightlightOnMap.put(SearchFields.FILE_TYPE_SEARCHABLE, "File Type");
solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PUBLICATION_DATE, "Publication Date");
solrFieldsToHightlightOnMap.put(SearchFields.DATASET_PERSISTENT_ID, localize("advanced.search.datasets.persistentId"));
/**
* @todo Dataverse subject and affiliation should be highlighted but
* this is commented out right now because the "friendly" names are not
* being shown on the dataverse cards. See also
* https://github.com/IQSS/dataverse/issues/1431
*/
// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_SUBJECT, "Subject");
// solrFieldsToHightlightOnMap.put(SearchFields.DATAVERSE_AFFILIATION, "Affiliation");
/**
* @todo: show highlight on file card?
* https://redmine.hmdc.harvard.edu/issues/3848
*/
solrFieldsToHightlightOnMap.put(SearchFields.FILENAME_WITHOUT_EXTENSION, "Filename Without Extension");
solrFieldsToHightlightOnMap.put(SearchFields.FILE_TAG_SEARCHABLE, "File Tag");
List<DatasetFieldType> datasetFields = datasetFieldService.findAllOrderedById();
for (DatasetFieldType datasetFieldType : datasetFields) {
String solrField = datasetFieldType.getSolrField().getNameSearchable();
String displayName = datasetFieldType.getDisplayName();
solrFieldsToHightlightOnMap.put(solrField, displayName);
}
for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
String solrField = entry.getKey();
// String displayName = entry.getValue();
solrQuery.addHighlightField(solrField);
}
solrQuery.setParam("fl", "*,score");
solrQuery.setParam("qt", "/spell");
solrQuery.setParam("facet", "true");
/**
* @todo: do we need facet.query?
*/
solrQuery.setParam("facet.query", "*");
for (String filterQuery : filterQueries) {
solrQuery.addFilterQuery(filterQuery);
}
// -----------------------------------
// PERMISSION FILTER QUERY
// -----------------------------------
String permissionFilterQuery = this.getPermissionFilterQuery(dataverseRequest, solrQuery, dataverse, onlyDatatRelatedToMe);
if (permissionFilterQuery != null) {
solrQuery.addFilterQuery(permissionFilterQuery);
}
// -----------------------------------
// Facets to Retrieve
// -----------------------------------
// solrQuery.addFacetField(SearchFields.HOST_DATAVERSE);
// solrQuery.addFacetField(SearchFields.AUTHOR_STRING);
solrQuery.addFacetField(SearchFields.DATAVERSE_CATEGORY);
solrQuery.addFacetField(SearchFields.METADATA_SOURCE);
// solrQuery.addFacetField(SearchFields.AFFILIATION);
solrQuery.addFacetField(SearchFields.PUBLICATION_DATE);
// solrQuery.addFacetField(SearchFields.CATEGORY);
// solrQuery.addFacetField(SearchFields.FILE_TYPE_MIME);
// solrQuery.addFacetField(SearchFields.DISTRIBUTOR);
// solrQuery.addFacetField(SearchFields.KEYWORD);
/**
* @todo when a new method on datasetFieldService is available
* (retrieveFacetsByDataverse?) only show the facets that the dataverse
* in question wants to show (and in the right order):
* https://redmine.hmdc.harvard.edu/issues/3490
*
* also, findAll only returns advancedSearchField = true... we should
* probably introduce the "isFacetable" boolean rather than caring about
* if advancedSearchField is true or false
*
*/
if (dataverse != null) {
for (DataverseFacet dataverseFacet : dataverse.getDataverseFacets()) {
DatasetFieldType datasetField = dataverseFacet.getDatasetFieldType();
solrQuery.addFacetField(datasetField.getSolrField().getNameFacetable());
}
}
solrQuery.addFacetField(SearchFields.FILE_TYPE);
/**
* @todo: hide the extra line this shows in the GUI... at least it's
* last...
*/
solrQuery.addFacetField(SearchFields.TYPE);
solrQuery.addFacetField(SearchFields.FILE_TAG);
solrQuery.addFacetField(SearchFields.ACCESS);
/**
* @todo: do sanity checking... throw error if negative
*/
solrQuery.setStart(paginationStart);
/**
* @todo: decide if year CITATION_YEAR is good enough or if we should
* support CITATION_DATE
*/
// Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("UTC"), Locale.UK);
// calendar.set(2010, 1, 1);
// Date start = calendar.getTime();
// calendar.set(2013, 1, 1);
// Date end = calendar.getTime();
// solrQuery.addDateRangeFacet(SearchFields.CITATION_DATE, start, end, "+1MONTH");
/**
* @todo make this configurable
*/
int thisYear = Calendar.getInstance().get(Calendar.YEAR);
/**
* @todo: odd or even makes a difference. Couldn't find value of 2014
* when this was set to 2000
*/
final int citationYearRangeStart = 1901;
final int citationYearRangeEnd = thisYear;
final int citationYearRangeSpan = 2;
/**
* @todo: these are dates and should be "range facets" not "field
* facets"
*
* right now they are lumped in with the datasetFieldService.findAll()
* above
*/
// solrQuery.addNumericRangeFacet(SearchFields.PRODUCTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan);
// solrQuery.addNumericRangeFacet(SearchFields.DISTRIBUTION_DATE_YEAR_ONLY, citationYearRangeStart, citationYearRangeEnd, citationYearRangeSpan);
solrQuery.setRows(numResultsPerPage);
logger.fine("Solr query:" + solrQuery);
// -----------------------------------
// Make the solr query
// -----------------------------------
QueryResponse queryResponse;
try {
queryResponse = solrServer.query(solrQuery);
} catch (RemoteSolrException ex) {
String messageFromSolr = ex.getLocalizedMessage();
String error = "Search Syntax Error: ";
String stringToHide = "org.apache.solr.search.SyntaxError: ";
if (messageFromSolr.startsWith(stringToHide)) {
// hide "org.apache.solr..."
error += messageFromSolr.substring(stringToHide.length());
} else {
error += messageFromSolr;
}
logger.info(error);
SolrQueryResponse exceptionSolrQueryResponse = new SolrQueryResponse(solrQuery);
exceptionSolrQueryResponse.setError(error);
// we can't show anything because of the search syntax error
long zeroNumResultsFound = 0;
long zeroGetResultsStart = 0;
List<SolrSearchResult> emptySolrSearchResults = new ArrayList<>();
List<FacetCategory> exceptionFacetCategoryList = new ArrayList<>();
Map<String, List<String>> emptySpellingSuggestion = new HashMap<>();
exceptionSolrQueryResponse.setNumResultsFound(zeroNumResultsFound);
exceptionSolrQueryResponse.setResultsStart(zeroGetResultsStart);
exceptionSolrQueryResponse.setSolrSearchResults(emptySolrSearchResults);
exceptionSolrQueryResponse.setFacetCategoryList(exceptionFacetCategoryList);
exceptionSolrQueryResponse.setTypeFacetCategories(exceptionFacetCategoryList);
exceptionSolrQueryResponse.setSpellingSuggestionsByToken(emptySpellingSuggestion);
return exceptionSolrQueryResponse;
} catch (SolrServerException ex) {
throw new SearchException("Internal Dataverse Search Engine Error", ex);
}
SolrDocumentList docs = queryResponse.getResults();
Iterator<SolrDocument> iter = docs.iterator();
List<SolrSearchResult> solrSearchResults = new ArrayList<>();
/**
* @todo refactor SearchFields to a hashmap (or something? put in
* database? internationalize?) to avoid the crazy reflection and string
* manipulation below
*/
Object searchFieldsObject = new SearchFields();
Field[] staticSearchFields = searchFieldsObject.getClass().getDeclaredFields();
String titleSolrField = null;
try {
DatasetFieldType titleDatasetField = datasetFieldService.findByName(DatasetFieldConstant.title);
titleSolrField = titleDatasetField.getSolrField().getNameSearchable();
} catch (EJBTransactionRolledbackException ex) {
logger.info("Couldn't find " + DatasetFieldConstant.title);
if (ex.getCause() instanceof TransactionRolledbackLocalException) {
if (ex.getCause().getCause() instanceof NoResultException) {
logger.info("Caught NoResultException");
}
}
}
Map<String, String> datasetfieldFriendlyNamesBySolrField = new HashMap<>();
Map<String, String> staticSolrFieldFriendlyNamesBySolrField = new HashMap<>();
String baseUrl = systemConfig.getDataverseSiteUrl();
while (iter.hasNext()) {
SolrDocument solrDocument = iter.next();
String id = (String) solrDocument.getFieldValue(SearchFields.ID);
Long entityid = (Long) solrDocument.getFieldValue(SearchFields.ENTITY_ID);
String type = (String) solrDocument.getFieldValue(SearchFields.TYPE);
float score = (Float) solrDocument.getFieldValue(SearchFields.RELEVANCE);
logger.fine("score for " + id + ": " + score);
String identifier = (String) solrDocument.getFieldValue(SearchFields.IDENTIFIER);
String citation = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION);
String citationPlainHtml = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION_HTML);
String persistentUrl = (String) solrDocument.getFieldValue(SearchFields.PERSISTENT_URL);
String name = (String) solrDocument.getFieldValue(SearchFields.NAME);
String nameSort = (String) solrDocument.getFieldValue(SearchFields.NAME_SORT);
// ArrayList titles = (ArrayList) solrDocument.getFieldValues(SearchFields.TITLE);
String title = (String) solrDocument.getFieldValue(titleSolrField);
Long datasetVersionId = (Long) solrDocument.getFieldValue(SearchFields.DATASET_VERSION_ID);
String deaccessionReason = (String) solrDocument.getFieldValue(SearchFields.DATASET_DEACCESSION_REASON);
// logger.info("titleSolrField: " + titleSolrField);
// logger.info("title: " + title);
String filetype = (String) solrDocument.getFieldValue(SearchFields.FILE_TYPE_FRIENDLY);
String fileContentType = (String) solrDocument.getFieldValue(SearchFields.FILE_CONTENT_TYPE);
Date release_or_create_date = (Date) solrDocument.getFieldValue(SearchFields.RELEASE_OR_CREATE_DATE);
String dateToDisplayOnCard = (String) solrDocument.getFirstValue(SearchFields.RELEASE_OR_CREATE_DATE_SEARCHABLE_TEXT);
String dvTree = (String) solrDocument.getFirstValue(SearchFields.SUBTREE);
List<String> matchedFields = new ArrayList<>();
List<Highlight> highlights = new ArrayList<>();
Map<SolrField, Highlight> highlightsMap = new HashMap<>();
Map<SolrField, List<String>> highlightsMap2 = new HashMap<>();
Map<String, Highlight> highlightsMap3 = new HashMap<>();
if (queryResponse.getHighlighting().get(id) != null) {
for (Map.Entry<String, String> entry : solrFieldsToHightlightOnMap.entrySet()) {
String field = entry.getKey();
String displayName = entry.getValue();
List<String> highlightSnippets = queryResponse.getHighlighting().get(id).get(field);
if (highlightSnippets != null) {
matchedFields.add(field);
/**
* @todo only SolrField.SolrType.STRING? that's not
* right... knit the SolrField object more into the
* highlighting stuff
*/
SolrField solrField = new SolrField(field, SolrField.SolrType.STRING, true, true);
Highlight highlight = new Highlight(solrField, highlightSnippets, displayName);
highlights.add(highlight);
highlightsMap.put(solrField, highlight);
highlightsMap2.put(solrField, highlightSnippets);
highlightsMap3.put(field, highlight);
}
}
}
SolrSearchResult solrSearchResult = new SolrSearchResult(query, name);
/**
* @todo put all this in the constructor?
*/
List<String> states = (ArrayList<String>) solrDocument.getFieldValue(SearchFields.PUBLICATION_STATUS);
if (states != null) {
// set list of all statuses
// this method also sets booleans for individual statuses
solrSearchResult.setPublicationStatuses(states);
}
// logger.info(id + ": " + description);
solrSearchResult.setId(id);
solrSearchResult.setEntityId(entityid);
if (retrieveEntities) {
solrSearchResult.setEntity(dvObjectService.findDvObject(entityid));
}
solrSearchResult.setIdentifier(identifier);
solrSearchResult.setPersistentUrl(persistentUrl);
solrSearchResult.setType(type);
solrSearchResult.setScore(score);
solrSearchResult.setNameSort(nameSort);
solrSearchResult.setReleaseOrCreateDate(release_or_create_date);
solrSearchResult.setDateToDisplayOnCard(dateToDisplayOnCard);
solrSearchResult.setMatchedFields(matchedFields);
solrSearchResult.setHighlightsAsList(highlights);
solrSearchResult.setHighlightsMap(highlightsMap);
solrSearchResult.setHighlightsAsMap(highlightsMap3);
Map<String, String> parent = new HashMap<>();
String description = (String) solrDocument.getFieldValue(SearchFields.DESCRIPTION);
solrSearchResult.setDescriptionNoSnippet(description);
solrSearchResult.setDeaccessionReason(deaccessionReason);
solrSearchResult.setDvTree(dvTree);
String originSource = (String) solrDocument.getFieldValue(SearchFields.METADATA_SOURCE);
if (IndexServiceBean.HARVESTED.equals(originSource)) {
solrSearchResult.setHarvested(true);
}
/**
* @todo start using SearchConstants class here
*/
if (type.equals("dataverses")) {
solrSearchResult.setName(name);
solrSearchResult.setHtmlUrl(baseUrl + "/dataverse/" + identifier);
solrSearchResult.setImageUrl(baseUrl + "/api/access/dvCardImage/" + entityid);
/**
* @todo Expose this API URL after "dvs" is changed to
* "dataverses". Also, is an API token required for published
* dataverses? Michael: url changed.
*/
// solrSearchResult.setApiUrl(baseUrl + "/api/dataverses/" + entityid);
} else if (type.equals("datasets")) {
solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?globalId=" + identifier);
solrSearchResult.setApiUrl(baseUrl + "/api/datasets/" + entityid);
solrSearchResult.setImageUrl(baseUrl + "/api/access/dsCardImage/" + datasetVersionId);
/**
* @todo Could use getFieldValues (plural) here.
*/
ArrayList<String> datasetDescriptions = (ArrayList<String>) solrDocument.getFieldValue(SearchFields.DATASET_DESCRIPTION);
if (datasetDescriptions != null) {
String firstDatasetDescription = datasetDescriptions.get(0);
if (firstDatasetDescription != null) {
solrSearchResult.setDescriptionNoSnippet(firstDatasetDescription);
}
}
solrSearchResult.setDatasetVersionId(datasetVersionId);
solrSearchResult.setCitation(citation);
solrSearchResult.setCitationHtml(citationPlainHtml);
if (title != null) {
// solrSearchResult.setTitle((String) titles.get(0));
solrSearchResult.setTitle((String) title);
} else {
logger.fine("No title indexed. Setting to empty string to prevent NPE. Dataset id " + entityid + " and version id " + datasetVersionId);
solrSearchResult.setTitle("");
}
List<String> authors = (ArrayList) solrDocument.getFieldValues(DatasetFieldConstant.authorName);
if (authors != null) {
solrSearchResult.setDatasetAuthors(authors);
}
} else if (type.equals("files")) {
String parentGlobalId = null;
Object parentGlobalIdObject = solrDocument.getFieldValue(SearchFields.PARENT_IDENTIFIER);
if (parentGlobalIdObject != null) {
parentGlobalId = (String) parentGlobalIdObject;
parent.put(SolrSearchResult.PARENT_IDENTIFIER, parentGlobalId);
}
solrSearchResult.setHtmlUrl(baseUrl + "/dataset.xhtml?persistentId=" + parentGlobalId);
solrSearchResult.setDownloadUrl(baseUrl + "/api/access/datafile/" + entityid);
/**
* @todo We are not yet setting the API URL for files because
* not all files have metadata. Only subsettable files (those
* with a datatable) seem to have metadata. Furthermore, the
* response is in XML whereas the rest of the Search API returns
* JSON.
*/
// solrSearchResult.setApiUrl(baseUrl + "/api/meta/datafile/" + entityid);
solrSearchResult.setImageUrl(baseUrl + "/api/access/fileCardImage/" + entityid);
solrSearchResult.setName(name);
solrSearchResult.setFiletype(filetype);
solrSearchResult.setFileContentType(fileContentType);
Object fileSizeInBytesObject = solrDocument.getFieldValue(SearchFields.FILE_SIZE_IN_BYTES);
if (fileSizeInBytesObject != null) {
try {
long fileSizeInBytesLong = (long) fileSizeInBytesObject;
solrSearchResult.setFileSizeInBytes(fileSizeInBytesLong);
} catch (ClassCastException ex) {
logger.info("Could not cast file " + entityid + " to long for " + SearchFields.FILE_SIZE_IN_BYTES + ": " + ex.getLocalizedMessage());
}
}
solrSearchResult.setFileMd5((String) solrDocument.getFieldValue(SearchFields.FILE_MD5));
try {
solrSearchResult.setFileChecksumType((DataFile.ChecksumType) DataFile.ChecksumType.fromString((String) solrDocument.getFieldValue(SearchFields.FILE_CHECKSUM_TYPE)));
} catch (IllegalArgumentException ex) {
logger.info("Exception setting setFileChecksumType: " + ex);
}
solrSearchResult.setFileChecksumValue((String) solrDocument.getFieldValue(SearchFields.FILE_CHECKSUM_VALUE));
solrSearchResult.setUnf((String) solrDocument.getFieldValue(SearchFields.UNF));
solrSearchResult.setDatasetVersionId(datasetVersionId);
List<String> fileCategories = (ArrayList) solrDocument.getFieldValues(SearchFields.FILE_TAG);
if (fileCategories != null) {
solrSearchResult.setFileCategories(fileCategories);
}
List<String> tabularDataTags = (ArrayList) solrDocument.getFieldValues(SearchFields.TABDATA_TAG);
if (tabularDataTags != null) {
Collections.sort(tabularDataTags);
solrSearchResult.setTabularDataTags(tabularDataTags);
}
}
/**
* @todo store PARENT_ID as a long instead and cast as such
*/
parent.put("id", (String) solrDocument.getFieldValue(SearchFields.PARENT_ID));
parent.put("name", (String) solrDocument.getFieldValue(SearchFields.PARENT_NAME));
parent.put("citation", (String) solrDocument.getFieldValue(SearchFields.PARENT_CITATION));
solrSearchResult.setParent(parent);
solrSearchResults.add(solrSearchResult);
}
Map<String, List<String>> spellingSuggestionsByToken = new HashMap<>();
SpellCheckResponse spellCheckResponse = queryResponse.getSpellCheckResponse();
if (spellCheckResponse != null) {
List<SpellCheckResponse.Suggestion> suggestions = spellCheckResponse.getSuggestions();
for (SpellCheckResponse.Suggestion suggestion : suggestions) {
spellingSuggestionsByToken.put(suggestion.getToken(), suggestion.getAlternatives());
}
}
List<FacetCategory> facetCategoryList = new ArrayList<FacetCategory>();
List<FacetCategory> typeFacetCategories = new ArrayList<>();
boolean hidePublicationStatusFacet = true;
boolean draftsAvailable = false;
boolean unpublishedAvailable = false;
boolean deaccessionedAvailable = false;
boolean hideMetadataSourceFacet = true;
for (FacetField facetField : queryResponse.getFacetFields()) {
FacetCategory facetCategory = new FacetCategory();
List<FacetLabel> facetLabelList = new ArrayList<>();
int numMetadataSources = 0;
for (FacetField.Count facetFieldCount : facetField.getValues()) {
/**
* @todo we do want to show the count for each facet
*/
// logger.info("field: " + facetField.getName() + " " + facetFieldCount.getName() + " (" + facetFieldCount.getCount() + ")");
if (facetFieldCount.getCount() > 0) {
FacetLabel facetLabel = new FacetLabel(facetFieldCount.getName(), facetFieldCount.getCount());
// quote field facets
facetLabel.setFilterQuery(facetField.getName() + ":\"" + facetFieldCount.getName() + "\"");
facetLabelList.add(facetLabel);
if (facetField.getName().equals(SearchFields.PUBLICATION_STATUS)) {
if (facetLabel.getName().equals(IndexServiceBean.getUNPUBLISHED_STRING())) {
unpublishedAvailable = true;
} else if (facetLabel.getName().equals(IndexServiceBean.getDRAFT_STRING())) {
draftsAvailable = true;
} else if (facetLabel.getName().equals(IndexServiceBean.getDEACCESSIONED_STRING())) {
deaccessionedAvailable = true;
}
}
if (facetField.getName().equals(SearchFields.METADATA_SOURCE)) {
numMetadataSources++;
}
}
}
if (numMetadataSources > 1) {
hideMetadataSourceFacet = false;
}
facetCategory.setName(facetField.getName());
// hopefully people will never see the raw facetField.getName() because it may well have an _s at the end
facetCategory.setFriendlyName(facetField.getName());
// try to find a friendlier name to display as a facet
/**
* @todo hmm, we thought we wanted the datasetFields array to go
* away once we have more granularity than findAll() available per
* the todo above but we need a way to lookup by Solr field, so
* we'll build a hashmap
*/
for (DatasetFieldType datasetField : datasetFields) {
String solrFieldNameForDataset = datasetField.getSolrField().getNameFacetable();
String friendlyName = datasetField.getDisplayName();
if (solrFieldNameForDataset != null && facetField.getName().endsWith(datasetField.getTmpNullFieldTypeIdentifier())) {
// give it the non-friendly name so we remember to update the reference data script for datasets
facetCategory.setName(facetField.getName());
} else if (solrFieldNameForDataset != null && facetField.getName().equals(solrFieldNameForDataset)) {
if (friendlyName != null && !friendlyName.isEmpty()) {
facetCategory.setFriendlyName(friendlyName);
// stop examining available dataset fields. we found a match
break;
}
}
datasetfieldFriendlyNamesBySolrField.put(datasetField.getSolrField().getNameFacetable(), friendlyName);
}
/**
* @todo get rid of this crazy reflection, per todo above... or
* should we... let's put into a hash the friendly names of facet
* categories, indexed by Solr field
*/
for (Field fieldObject : staticSearchFields) {
String name = fieldObject.getName();
String staticSearchField = null;
try {
staticSearchField = (String) fieldObject.get(searchFieldsObject);
} catch (IllegalArgumentException ex) {
Logger.getLogger(SearchServiceBean.class.getName()).log(Level.SEVERE, null, ex);
} catch (IllegalAccessException ex) {
Logger.getLogger(SearchServiceBean.class.getName()).log(Level.SEVERE, null, ex);
}
if (staticSearchField != null && facetField.getName().equals(staticSearchField)) {
String[] parts = name.split("_");
StringBuilder stringBuilder = new StringBuilder();
for (String part : parts) {
stringBuilder.append(getCapitalizedName(part.toLowerCase()) + " ");
}
String friendlyNameWithTrailingSpace = stringBuilder.toString();
String friendlyName = friendlyNameWithTrailingSpace.replaceAll(" $", "");
facetCategory.setFriendlyName(friendlyName);
// logger.info("adding <<<" + staticSearchField + ":" + friendlyName + ">>>");
staticSolrFieldFriendlyNamesBySolrField.put(staticSearchField, friendlyName);
// stop examining the declared/static fields in the SearchFields object. we found a match
break;
}
}
facetCategory.setFacetLabel(facetLabelList);
if (!facetLabelList.isEmpty()) {
if (facetCategory.getName().equals(SearchFields.TYPE)) {
// the "type" facet is special, these are not
typeFacetCategories.add(facetCategory);
} else if (facetCategory.getName().equals(SearchFields.PUBLICATION_STATUS)) {
if (unpublishedAvailable || draftsAvailable || deaccessionedAvailable) {
hidePublicationStatusFacet = false;
}
if (!hidePublicationStatusFacet) {
facetCategoryList.add(facetCategory);
}
} else if (facetCategory.getName().equals(SearchFields.METADATA_SOURCE)) {
if (!hideMetadataSourceFacet) {
facetCategoryList.add(facetCategory);
}
} else {
facetCategoryList.add(facetCategory);
}
}
}
// for now the only range facet is citation year
for (RangeFacet rangeFacet : queryResponse.getFacetRanges()) {
FacetCategory facetCategory = new FacetCategory();
List<FacetLabel> facetLabelList = new ArrayList<>();
for (Object rfObj : rangeFacet.getCounts()) {
RangeFacet.Count rangeFacetCount = (RangeFacet.Count) rfObj;
String valueString = rangeFacetCount.getValue();
Integer start = Integer.parseInt(valueString);
Integer end = start + Integer.parseInt(rangeFacet.getGap().toString());
// to avoid overlapping dates
end = end - 1;
if (rangeFacetCount.getCount() > 0) {
FacetLabel facetLabel = new FacetLabel(start + "-" + end, new Long(rangeFacetCount.getCount()));
// special [12 TO 34] syntax for range facets
facetLabel.setFilterQuery(rangeFacet.getName() + ":" + "[" + start + " TO " + end + "]");
facetLabelList.add(facetLabel);
}
}
facetCategory.setName(rangeFacet.getName());
facetCategory.setFacetLabel(facetLabelList);
// reverse to show the newest citation year range at the top
List<FacetLabel> facetLabelListReversed = new ArrayList<>();
ListIterator li = facetLabelList.listIterator(facetLabelList.size());
while (li.hasPrevious()) {
facetLabelListReversed.add((FacetLabel) li.previous());
}
facetCategory.setFacetLabel(facetLabelListReversed);
if (!facetLabelList.isEmpty()) {
facetCategoryList.add(facetCategory);
}
}
SolrQueryResponse solrQueryResponse = new SolrQueryResponse(solrQuery);
solrQueryResponse.setSolrSearchResults(solrSearchResults);
solrQueryResponse.setSpellingSuggestionsByToken(spellingSuggestionsByToken);
solrQueryResponse.setFacetCategoryList(facetCategoryList);
solrQueryResponse.setTypeFacetCategories(typeFacetCategories);
solrQueryResponse.setNumResultsFound(queryResponse.getResults().getNumFound());
solrQueryResponse.setResultsStart(queryResponse.getResults().getStart());
solrQueryResponse.setDatasetfieldFriendlyNamesBySolrField(datasetfieldFriendlyNamesBySolrField);
solrQueryResponse.setStaticSolrFieldFriendlyNamesBySolrField(staticSolrFieldFriendlyNamesBySolrField);
String[] filterQueriesArray = solrQuery.getFilterQueries();
if (filterQueriesArray != null) {
// null check added because these tests were failing: mvn test -Dtest=SearchIT
List<String> actualFilterQueries = Arrays.asList(filterQueriesArray);
logger.fine("actual filter queries: " + actualFilterQueries);
solrQueryResponse.setFilterQueriesActual(actualFilterQueries);
} else {
// how often is this null?
logger.info("solrQuery.getFilterQueries() was null");
}
solrQueryResponse.setDvObjectCounts(queryResponse.getFacetField("dvObjectType"));
solrQueryResponse.setPublicationStatusCounts(queryResponse.getFacetField("publicationStatus"));
return solrQueryResponse;
}
private static String localize(String bundleKey) {
try {
String value = JH.localize(bundleKey);
return value;
} catch (Exception e) {
// can throw MissingResourceException
return "Match";
}
}
public String getCapitalizedName(String name) {
return Character.toUpperCase(name.charAt(0)) + name.substring(1);
}
/**
* Moved this logic out of the "search" function
*
* @return
*/
private String getPermissionFilterQuery(DataverseRequest dataverseRequest, SolrQuery solrQuery, Dataverse dataverse, boolean onlyDatatRelatedToMe) {
User user = dataverseRequest.getUser();
if (user == null) {
throw new NullPointerException("user cannot be null");
}
if (solrQuery == null) {
throw new NullPointerException("solrQuery cannot be null");
}
/**
* @todo For people who are not logged in, should we show stuff indexed
* with "AllUsers" group or not? If so, uncomment the allUsersString
* stuff below.
*/
// String allUsersString = IndexServiceBean.getGroupPrefix() + AllUsers.get().getAlias();
// String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + allUsersString + ")";
String publicOnly = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + ")";
// String publicOnly = "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getPublicGroupString();
// initialize to public only to be safe
String dangerZoneNoSolrJoin = null;
if (user instanceof PrivateUrlUser) {
user = GuestUser.get();
}
// ----------------------------------------------------
// (1) Is this a GuestUser?
// Yes, see if GuestUser is part of any groups such as IP Groups.
// ----------------------------------------------------
if (user instanceof GuestUser) {
String groupsFromProviders = "";
Set<Group> groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest));
StringBuilder sb = new StringBuilder();
for (Group group : groups) {
logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias());
String groupAlias = group.getAlias();
if (groupAlias != null && !groupAlias.isEmpty()) {
sb.append(" OR ");
// i.e. group_builtIn/all-users, ip/ipGroup3
sb.append(IndexServiceBean.getGroupPrefix()).append(groupAlias);
}
}
groupsFromProviders = sb.toString();
logger.fine("groupsFromProviders:" + groupsFromProviders);
String guestWithGroups = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + groupsFromProviders + ")";
logger.fine(guestWithGroups);
return guestWithGroups;
}
// ----------------------------------------------------
// (2) Retrieve Authenticated User
// ----------------------------------------------------
if (!(user instanceof AuthenticatedUser)) {
logger.severe("Should never reach here. A User must be an AuthenticatedUser or a Guest");
throw new IllegalStateException("A User must be an AuthenticatedUser or a Guest");
}
AuthenticatedUser au = (AuthenticatedUser) user;
// Logged in user, has publication status facet
//
solrQuery.addFacetField(SearchFields.PUBLICATION_STATUS);
// ----------------------------------------------------
// (3) Is this a Super User?
// Yes, give back everything
// ----------------------------------------------------
if (au.isSuperuser()) {
// dangerous because this user will be able to see
// EVERYTHING in Solr with no regard to permissions!
return dangerZoneNoSolrJoin;
}
// ----------------------------------------------------
// (4) User is logged in AND onlyDatatRelatedToMe == true
// Yes, give back everything -> the settings will be in
// the filterqueries given to search
// ----------------------------------------------------
if (onlyDatatRelatedToMe == true) {
if (systemConfig.myDataDoesNotUsePermissionDocs()) {
logger.fine("old 4.2 behavior: MyData is not using Solr permission docs");
return dangerZoneNoSolrJoin;
} else {
logger.fine("new post-4.2 behavior: MyData is using Solr permission docs");
}
}
// ----------------------------------------------------
// (5) Work with Authenticated User who is not a Superuser
// ----------------------------------------------------
/**
* @todo all this code needs cleanup and clarification.
*/
/**
* Every AuthenticatedUser is part of a "User Private Group" (UGP), a
* concept we borrow from RHEL:
* https://access.redhat.com/site/documentation/en-US/Red_Hat_Enterprise_Linux/6/html/Deployment_Guide/ch-Managing_Users_and_Groups.html#s2-users-groups-private-groups
*/
/**
* @todo rename this from publicPlusUserPrivateGroup. Confusing
*/
// safe default: public only
String publicPlusUserPrivateGroup = publicOnly;
// + (onlyDatatRelatedToMe ? "" : (publicOnly + " OR "))
// + "{!join from=" + SearchFields.GROUPS + " to=" + SearchFields.PERMS + "}id:" + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + ")";
// /**
// * @todo add onlyDatatRelatedToMe option into the experimental JOIN
// * before enabling it.
// */
/**
* From a search perspective, we don't care about if the group was
* created within one dataverse or another. We just want a list of *all*
* the groups the user is part of. We are greedy. We want all BuiltIn
* Groups, Shibboleth Groups, IP Groups, "system" groups, everything.
*
* A JOIN on "permission documents" will determine if the user can find
* a given "content document" (dataset version, etc) in Solr.
*/
String groupsFromProviders = "";
Set<Group> groups = groupService.collectAncestors(groupService.groupsFor(dataverseRequest));
StringBuilder sb = new StringBuilder();
for (Group group : groups) {
logger.fine("found group " + group.getIdentifier() + " with alias " + group.getAlias());
String groupAlias = group.getAlias();
if (groupAlias != null && !groupAlias.isEmpty()) {
sb.append(" OR ");
// i.e. group_builtIn/all-users, group_builtIn/authenticated-users, group_1-explictGroup1, group_shib/2
sb.append(IndexServiceBean.getGroupPrefix() + groupAlias);
}
}
groupsFromProviders = sb.toString();
logger.fine(groupsFromProviders);
if (true) {
/**
* @todo get rid of "experimental" in name
*/
String experimentalJoin = "{!join from=" + SearchFields.DEFINITION_POINT + " to=id}" + SearchFields.DISCOVERABLE_BY + ":(" + IndexServiceBean.getPublicGroupString() + " OR " + IndexServiceBean.getGroupPerUserPrefix() + au.getId() + groupsFromProviders + ")";
publicPlusUserPrivateGroup = experimentalJoin;
}
//permissionFilterQuery = publicPlusUserPrivateGroup;
logger.fine(publicPlusUserPrivateGroup);
return publicPlusUserPrivateGroup;
}
}