package edu.harvard.iq.dataverse.search.savedsearch; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetLinkingDataverse; import edu.harvard.iq.dataverse.DatasetLinkingServiceBean; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseLinkingDataverse; import edu.harvard.iq.dataverse.DataverseLinkingServiceBean; import edu.harvard.iq.dataverse.DvObject; import edu.harvard.iq.dataverse.DvObjectServiceBean; import edu.harvard.iq.dataverse.EjbDataverseEngine; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.search.SearchServiceBean; import edu.harvard.iq.dataverse.search.SolrQueryResponse; import edu.harvard.iq.dataverse.search.SolrSearchResult; import edu.harvard.iq.dataverse.engine.command.exception.CommandException; import edu.harvard.iq.dataverse.engine.command.impl.LinkDatasetCommand; import edu.harvard.iq.dataverse.engine.command.impl.LinkDataverseCommand; import edu.harvard.iq.dataverse.search.SearchException; import edu.harvard.iq.dataverse.search.SearchFields; import edu.harvard.iq.dataverse.search.SortBy; import java.util.List; import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.Stateless; import javax.inject.Named; import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; import javax.persistence.EntityManager; import javax.persistence.NoResultException; import javax.persistence.NonUniqueResultException; import javax.persistence.PersistenceContext; import javax.persistence.TypedQuery; import javax.servlet.http.HttpServletRequest; @Stateless @Named public class SavedSearchServiceBean { private static final Logger logger = Logger.getLogger(SavedSearchServiceBean.class.getCanonicalName()); @EJB SearchServiceBean searchService; @EJB DvObjectServiceBean dvObjectService; @EJB DatasetLinkingServiceBean datasetLinkingService; @EJB DataverseLinkingServiceBean dataverseLinkingService; @EJB EjbDataverseEngine commandEngine; private final String resultString = "result"; @PersistenceContext(unitName = "VDCNet-ejbPU") private EntityManager em; public SavedSearch find(long id) { TypedQuery<SavedSearch> typedQuery = em.createQuery("SELECT OBJECT(o) FROM SavedSearch AS o WHERE o.id = :id", SavedSearch.class); typedQuery.setParameter("id", id); try { return typedQuery.getSingleResult(); } catch (NoResultException | NonUniqueResultException ex) { return null; } } public List<SavedSearch> findAll() { TypedQuery<SavedSearch> typedQuery = em.createQuery("SELECT OBJECT(o) FROM SavedSearch AS o ORDER BY o.id", SavedSearch.class); return typedQuery.getResultList(); } public SavedSearch add(SavedSearch toPersist) { /** * @todo Don't let anyone persist the same saved search twice. What does * "same" mean. For the first cut we'll check for a String match of both * query and filterQueries. * * @todo Don't allow wildcard queries. */ SavedSearch persisted = null; try { persisted = em.merge(toPersist); } catch (Exception ex) { System.out.println("exeption: " + ex); } return persisted; } public boolean delete(long id) { SavedSearch doomed = find(id); boolean wasDeleted = false; if (doomed != null) { System.out.println("deleting saved search id " + doomed.getId()); em.remove(doomed); em.flush(); wasDeleted = true; } else { System.out.println("problem deleting saved search id " + id); } return wasDeleted; } public SavedSearch save(SavedSearch savedSearch) { if (savedSearch.getId() == null) { em.persist(savedSearch); return savedSearch; } else { return em.merge(savedSearch); } } public JsonObjectBuilder makeLinksForAllSavedSearches(boolean debugFlag) throws SearchException, CommandException { JsonObjectBuilder response = Json.createObjectBuilder(); List<SavedSearch> allSavedSearches = findAll(); JsonArrayBuilder savedSearchArrayBuilder = Json.createArrayBuilder(); for (SavedSearch savedSearch : allSavedSearches) { DataverseRequest dataverseRequest = new DataverseRequest(savedSearch.getCreator(), getHttpServletRequest()); JsonObjectBuilder perSavedSearchResponse = makeLinksForSingleSavedSearch(dataverseRequest, savedSearch, debugFlag); savedSearchArrayBuilder.add(perSavedSearchResponse); } response.add("hits by saved search", savedSearchArrayBuilder); return response; } /** * The "Saved Search" and highly related "Linked Dataverses and Linked * Datasets" features can be thought of as periodic execution of the * LinkDataverseCommand and LinkDatasetCommand. As of this writing that * periodic execution can be triggered via a cron job but we'd like to put * it on an EJB timer as part of * https://github.com/IQSS/dataverse/issues/2543 . * * The commands are executed by the creator of the SavedSearch. What happens * if the users loses the permission that the command requires? Should the * commands continue to be executed periodically as some "system" user? * * @return Debug information in the form of a JSON object, which is much * more structured that a simple String. */ public JsonObjectBuilder makeLinksForSingleSavedSearch(DataverseRequest dvReq, SavedSearch savedSearch, boolean debugFlag) throws SearchException, CommandException { JsonObjectBuilder response = Json.createObjectBuilder(); JsonArrayBuilder savedSearchArrayBuilder = Json.createArrayBuilder(); JsonArrayBuilder infoPerHit = Json.createArrayBuilder(); SolrQueryResponse queryResponse = findHits(savedSearch); for (SolrSearchResult solrSearchResult : queryResponse.getSolrSearchResults()) { JsonObjectBuilder hitInfo = Json.createObjectBuilder(); hitInfo.add("name", solrSearchResult.getNameSort()); hitInfo.add("dvObjectId", solrSearchResult.getEntityId()); DvObject dvObjectThatDefinitionPointWillLinkTo = dvObjectService.findDvObject(solrSearchResult.getEntityId()); if (dvObjectThatDefinitionPointWillLinkTo == null) { hitInfo.add(resultString, "Could not find DvObject with id " + solrSearchResult.getEntityId()); infoPerHit.add(hitInfo); break; } if (dvObjectThatDefinitionPointWillLinkTo.isInstanceofDataverse()) { Dataverse dataverseToLinkTo = (Dataverse) dvObjectThatDefinitionPointWillLinkTo; if (wouldResultInLinkingToItself(savedSearch.getDefinitionPoint(), dataverseToLinkTo)) { hitInfo.add(resultString, "Skipping because dataverse id " + dataverseToLinkTo.getId() + " would link to itself."); } else if (alreadyLinkedToTheDataverse(savedSearch.getDefinitionPoint(), dataverseToLinkTo)) { hitInfo.add(resultString, "Skipping because dataverse " + savedSearch.getDefinitionPoint().getId() + " already links to dataverse " + dataverseToLinkTo.getId() + "."); } else if (dataverseToLinkToIsAlreadyPartOfTheSubtree(savedSearch.getDefinitionPoint(), dataverseToLinkTo)) { hitInfo.add(resultString, "Skipping because " + dataverseToLinkTo + " is already part of the subtree for " + savedSearch.getDefinitionPoint()); } else { DataverseLinkingDataverse link = commandEngine.submitInNewTransaction(new LinkDataverseCommand(dvReq, savedSearch.getDefinitionPoint(), dataverseToLinkTo)); hitInfo.add(resultString, "Persisted DataverseLinkingDataverse id " + link.getId() + " link of " + dataverseToLinkTo + " to " + savedSearch.getDefinitionPoint()); } } else if (dvObjectThatDefinitionPointWillLinkTo.isInstanceofDataset()) { Dataset datasetToLinkTo = (Dataset) dvObjectThatDefinitionPointWillLinkTo; if (alreadyLinkedToTheDataset(savedSearch.getDefinitionPoint(), datasetToLinkTo)) { hitInfo.add(resultString, "Skipping because dataverse " + savedSearch.getDefinitionPoint() + " already links to dataset " + datasetToLinkTo + "."); } else if (datasetToLinkToIsAlreadyPartOfTheSubtree(savedSearch.getDefinitionPoint(), datasetToLinkTo)) { // already there from normal search/browse hitInfo.add(resultString, "Skipping because dataset " + datasetToLinkTo.getId() + " is already part of the subtree for " + savedSearch.getDefinitionPoint().getAlias()); } else if (datasetAncestorAlreadyLinked(savedSearch.getDefinitionPoint(), datasetToLinkTo)) { hitInfo.add(resultString, "FIXME: implement this?"); } else { DatasetLinkingDataverse link = commandEngine.submitInNewTransaction(new LinkDatasetCommand(dvReq, savedSearch.getDefinitionPoint(), datasetToLinkTo)); hitInfo.add(resultString, "Persisted DatasetLinkingDataverse id " + link.getId() + " link of " + link.getDataset() + " to " + link.getLinkingDataverse()); } } else if (dvObjectThatDefinitionPointWillLinkTo.isInstanceofDataFile()) { hitInfo.add(resultString, "Skipping because the search matched a file. The matched file id was " + dvObjectThatDefinitionPointWillLinkTo.getId() + "."); } else { hitInfo.add(resultString, "Unexpected DvObject type."); } infoPerHit.add(hitInfo); } JsonObjectBuilder info = getInfo(savedSearch, infoPerHit); if (debugFlag) { info.add("debug", getDebugInfo(savedSearch)); } savedSearchArrayBuilder.add(info); response.add("hits for saved search id " + savedSearch.getId(), savedSearchArrayBuilder); return response; } private SolrQueryResponse findHits(SavedSearch savedSearch) throws SearchException { String sortField = SearchFields.RELEVANCE; String sortOrder = SortBy.DESCENDING; SortBy sortBy = new SortBy(sortField, sortOrder); int paginationStart = 0; boolean dataRelatedToMe = false; int numResultsPerPage = Integer.MAX_VALUE; SolrQueryResponse solrQueryResponse = searchService.search( new DataverseRequest(savedSearch.getCreator(), getHttpServletRequest()), savedSearch.getDefinitionPoint(), savedSearch.getQuery(), savedSearch.getFilterQueriesAsStrings(), sortBy.getField(), sortBy.getOrder(), paginationStart, dataRelatedToMe, numResultsPerPage ); return solrQueryResponse; } private JsonObjectBuilder getInfo(SavedSearch savedSearch, JsonArrayBuilder infoPerHit) { JsonObjectBuilder info = Json.createObjectBuilder(); info.add("definitionPointAlias", savedSearch.getDefinitionPoint().getAlias()); info.add("savedSearchId", savedSearch.getId()); info.add("hitInfo", infoPerHit); return info; } private JsonObjectBuilder getDebugInfo(SavedSearch savedSearch) { JsonObjectBuilder debug = Json.createObjectBuilder(); debug.add("creatorId", savedSearch.getCreator().getId()); debug.add("query", savedSearch.getQuery()); debug.add("filterQueries", getFilterQueries(savedSearch)); return debug; } private JsonArrayBuilder getFilterQueries(SavedSearch savedSearch) { JsonArrayBuilder filterQueriesArrayBuilder = Json.createArrayBuilder(); for (String filterQueryToAdd : savedSearch.getFilterQueriesAsStrings()) { filterQueriesArrayBuilder.add(filterQueryToAdd); } return filterQueriesArrayBuilder; } private boolean alreadyLinkedToTheDataverse(Dataverse definitionPoint, Dataverse dataverseToLinkTo) { return dataverseLinkingService.alreadyLinked(definitionPoint, dataverseToLinkTo); } private boolean alreadyLinkedToTheDataset(Dataverse definitionPoint, Dataset linkToThisDataset) { return datasetLinkingService.alreadyLinked(definitionPoint, linkToThisDataset); } private static boolean wouldResultInLinkingToItself(Dataverse savedSearchDefinitionPoint, Dataverse dataverseToLinkTo) { return savedSearchDefinitionPoint.equals(dataverseToLinkTo); } private boolean datasetToLinkToIsAlreadyPartOfTheSubtree(Dataverse definitionPoint, Dataset datasetWeMayLinkTo) { Dataverse ancestor = datasetWeMayLinkTo.getOwner(); while (ancestor != null) { if (ancestor.equals(definitionPoint)) { return true; } ancestor = ancestor.getOwner(); } return false; } private boolean dataverseToLinkToIsAlreadyPartOfTheSubtree(Dataverse definitionPoint, Dataverse dataverseWeMayLinkTo) { StringBuilder sb = new StringBuilder(); while (dataverseWeMayLinkTo != null) { String alias = dataverseWeMayLinkTo.getAlias(); logger.fine("definitionPoint " + definitionPoint.getAlias() + " may link to " + alias); sb.append(alias + " "); if (dataverseWeMayLinkTo.equals(definitionPoint)) { return true; } dataverseWeMayLinkTo = dataverseWeMayLinkTo.getOwner(); } logger.fine("dataverse aliases seen on the way to root: " + sb); return false; } /** * @todo Should we implement this? If so, also do the check at the files * level if there is a match on a file. */ private boolean datasetAncestorAlreadyLinked(Dataverse definitionPoint, Dataset datasetToLinkTo) { return false; } public static HttpServletRequest getHttpServletRequest() { /** * This HttpServletRequest object is purposefully set to null. "There's * another issue here, though - the IP address. The request is sent from * a cron job - I assume localhost? - and it's source IP address is * different from the one the user may have, and quite possibly more * privileged. It maybe safest to pass in a null http request at this * stage." -- michbarsinai * * When Saved Search was designed, there was no DataverseRequest object * so what is persisted is the id of the AuthenticatedUser. When a Saved * Search is later re-executed via cron, the AuthenticatedUser is used * but Saved Search has no memory of which IP address was used when the * Saved Search was created. The default IP address in the * DataverseRequest constructor is used instead, which as of this * writing is 0.0.0.0 to mean "undefined". Is this a feature or a bug? * What is the expected interplay between Saved Search and IP Groups? * Users might be surprised to see certain DvObjects in the results of * their query when creating the Saved Search and later find that those * DvObjects, which are only visible due to an IP Groups membership, are * not found by Saved Search when executed by cron, for example. As of * this writing Saved Search is a superuser-only feature so perhaps IP * Groups are irrelevant because all DvObjects are discoverable to * superusers. */ return null; } }