/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package edu.harvard.iq.dataverse.harvest.server; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.export.ExportException; import edu.harvard.iq.dataverse.export.ExportService; import edu.harvard.iq.dataverse.search.IndexServiceBean; import java.io.File; import java.io.IOException; import java.sql.Timestamp; import java.text.SimpleDateFormat; import java.util.Collection; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.Stateless; import javax.ejb.TransactionAttribute; import static javax.ejb.TransactionAttributeType.REQUIRES_NEW; import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; import javax.persistence.Query; import javax.persistence.TemporalType; /** * * @author Leonid Andreev * based on the implementation of "HarvestStudyServiceBean" from * DVN 3*, by Gustavo Durand. */ @Stateless @Named public class OAIRecordServiceBean implements java.io.Serializable { @EJB OAISetServiceBean oaiSetService; @EJB IndexServiceBean indexService; @EJB DatasetServiceBean datasetService; //@EJB //ExportService exportService; @PersistenceContext(unitName = "VDCNet-ejbPU") EntityManager em; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.server.OAIRecordServiceBean"); /* public void updateOaiRecords() { Date updateTime = new Date(); List<OAISet> sets = oaiSetService.findAll(); for (OAISet oaiSet : sets) { List<Long> studyIds = indexService.query(oaiSet.getDefinition()); studyIds = studyService.getVisibleStudies(studyIds, null); studyIds = studyService.getViewableStudies(studyIds); updateOaiRecords( oaiSet.getSpec(), studyIds, updateTime ); } // also do noset membet List<Long> studyIds = studyService.getAllNonHarvestedStudyIds(); studyIds = studyService.getVisibleStudies(studyIds, null); studyIds = studyService.getViewableStudies(studyIds); updateOaiRecords( null, studyIds, updateTime ); } */ public void updateOaiRecords(String setName, List<Long> datasetIds, Date updateTime, boolean doExport) { updateOaiRecords(setName, datasetIds, updateTime, doExport, logger); } public void updateOaiRecords(String setName, List<Long> datasetIds, Date updateTime, boolean doExport, Logger setUpdateLogger) { // create Map of OaiRecords List<OAIRecord> oaiRecords = findOaiRecordsBySetName( setName ); Map<String,OAIRecord> recordMap = new HashMap(); if (oaiRecords != null) { for (OAIRecord record : oaiRecords) { // look for duplicates here? delete? recordMap.put(record.getGlobalId(), record); } } else { setUpdateLogger.fine("Null returned - no records found."); } if (!recordMap.isEmpty()) { setUpdateLogger.fine("Found "+recordMap.size()+" existing records"); } else { setUpdateLogger.fine("No records in the set yet."); } if (datasetIds != null) { for (Long datasetId : datasetIds) { setUpdateLogger.fine("processing dataset id=" + datasetId); Dataset dataset = datasetService.find(datasetId); if (dataset == null) { setUpdateLogger.fine("failed to find dataset!"); } else { setUpdateLogger.fine("found dataset."); // TODO: option to *force* export? if (doExport) { // TODO: // Review this logic - specifically for handling of // deaccessioned datasets. -- L.A. 4.5 // OK, it looks like we can't rely on .getPublicationDate() - // as it is essentially the *first publication* date; // and we are interested in the *last* DatasetVersion releasedVersion = dataset.getReleasedVersion(); Date publicationDate = releasedVersion == null ? null : releasedVersion.getReleaseTime(); //if (dataset.getPublicationDate() != null // && (dataset.getLastExportTime() == null // || dataset.getLastExportTime().before(dataset.getPublicationDate()))) { if (publicationDate != null && (dataset.getLastExportTime() == null || dataset.getLastExportTime().before(publicationDate))) { setUpdateLogger.fine("Attempting to run export on dataset " + dataset.getGlobalId()); exportAllFormats(dataset); } } setUpdateLogger.fine("\"last exported\" timestamp: " + dataset.getLastExportTime()); em.refresh(dataset); setUpdateLogger.fine("\"last exported\" timestamp, after db refresh: " + dataset.getLastExportTime()); updateOaiRecordForDataset(dataset, setName, recordMap, setUpdateLogger); } } } // anything left in the map should be marked as removed! markOaiRecordsAsRemoved( recordMap.values(), updateTime, setUpdateLogger); } // This method updates - creates/refreshes/un-marks-as-deleted - one OAI // record at a time. It does so inside its own transaction, to ensure that // the changes take place immediately. (except the method is called from // right here, in this EJB - so the attribute does not do anything! (TODO:!) @TransactionAttribute(REQUIRES_NEW) public void updateOaiRecordForDataset(Dataset dataset, String setName, Map<String, OAIRecord> recordMap, Logger setUpdateLogger) { // TODO: review .isReleased() logic // Answer: no, we can't trust isReleased()! It's a dvobject method that // simply returns (publicationDate != null). And the publication date // stays in place even if all the released versions have been deaccessioned. boolean isReleased = dataset.getReleasedVersion() != null; if (isReleased && dataset.getLastExportTime() != null) { OAIRecord record = recordMap.get(dataset.getGlobalId()); if (record == null) { setUpdateLogger.info("creating a new OAI Record for " + dataset.getGlobalId()); record = new OAIRecord(setName, dataset.getGlobalId(), new Date()); em.persist(record); } else { if (record.isRemoved()) { setUpdateLogger.info("\"un-deleting\" an existing OAI Record for " + dataset.getGlobalId()); record.setRemoved(false); record.setLastUpdateTime(new Date()); } else if (dataset.getLastExportTime().after(record.getLastUpdateTime())) { setUpdateLogger.info("updating the timestamp on an existing record."); record.setLastUpdateTime(new Date()); } recordMap.remove(record.getGlobalId()); } } } // Updates any existing OAI records for this dataset // Should be called whenever there's a change in the release status of the Dataset // (i.e., when it's published or deaccessioned), so that the timestamps and // on the records could be freshened before the next reexport of the corresponding // sets. // *Note* that the method assumes that a full metadata reexport has already // been attempted on the dataset. (Meaning that if getLastExportTime is null, // we'll just assume that the exports failed and the OAI records must be marked // as "deleted". @TransactionAttribute(REQUIRES_NEW) public void updateOaiRecordsForDataset(Dataset dataset) { // create Map of OaiRecords List<OAIRecord> oaiRecords = findOaiRecordsByGlobalId(dataset.getGlobalId()); if (oaiRecords != null) { DatasetVersion releasedVersion = dataset.getReleasedVersion(); if (releasedVersion == null || dataset.getLastExportTime() == null) { // Datast must have been deaccessioned. markOaiRecordsAsRemoved(oaiRecords, new Date(), logger); return; } for (OAIRecord record : oaiRecords) { if (record.isRemoved()) { logger.fine("\"un-deleting\" an existing OAI Record for " + dataset.getGlobalId()); record.setRemoved(false); record.setLastUpdateTime(new Date()); } else if (dataset.getLastExportTime().after(record.getLastUpdateTime())) { record.setLastUpdateTime(new Date()); } } } else { logger.fine("Null returned - no records found."); } } public void markOaiRecordsAsRemoved(Collection<OAIRecord> records, Date updateTime, Logger setUpdateLogger) { for (OAIRecord oaiRecord : records) { if ( !oaiRecord.isRemoved() ) { setUpdateLogger.fine("marking OAI record "+oaiRecord.getGlobalId()+" as removed"); oaiRecord.setRemoved(true); oaiRecord.setLastUpdateTime(updateTime); } else { setUpdateLogger.fine("OAI record "+oaiRecord.getGlobalId()+" is already marked as removed."); } } } // TODO: // Export functionality probably deserves its own EJB ServiceBean - // so maybe create ExportServiceBean, and move these methods there? // (why these need to be in an EJB bean at all, what's wrong with keeping // them in the loadable ExportService? - since we need to modify the // "last export" timestamp on the dataset, being able to do that in the // @EJB context is convenient. public void exportAllFormats(Dataset dataset) { try { ExportService exportServiceInstance = ExportService.getInstance(); logger.fine("Attempting to run export on dataset "+dataset.getGlobalId()); exportServiceInstance.exportAllFormats(dataset); datasetService.updateLastExportTimeStamp(dataset.getId()); } catch (ExportException ee) {logger.fine("Caught export exception while trying to export. (ignoring)");} catch (Exception e) {logger.fine("Caught unknown exception while trying to export (ignoring)");} } @TransactionAttribute(REQUIRES_NEW) public void exportAllFormatsInNewTransaction(Dataset dataset) throws ExportException { try { ExportService exportServiceInstance = ExportService.getInstance(); exportServiceInstance.exportAllFormats(dataset); datasetService.updateLastExportTimeStamp(dataset.getId()); } catch (Exception e) { logger.fine("Caught unknown exception while trying to export"); throw new ExportException(e.getMessage()); } } public OAIRecord findOAIRecordBySetNameandGlobalId(String setName, String globalId) { OAIRecord oaiRecord = null; String queryString = "SELECT object(h) from OAIRecord h where h.globalId = :globalId"; queryString += setName != null ? " and h.setName = :setName" : ""; // and h.setName is null"; logger.fine("findOAIRecordBySetNameandGlobalId; query: "+queryString+"; globalId: "+globalId+"; setName: "+setName); Query query = em.createQuery(queryString).setParameter("globalId",globalId); if (setName != null) { query.setParameter("setName",setName); } try { oaiRecord = (OAIRecord) query.setMaxResults(1).getSingleResult(); } catch (javax.persistence.NoResultException e) { // Do nothing, just return null. } logger.fine("returning oai record."); return oaiRecord; } public List<OAIRecord> findOaiRecordsByGlobalId(String globalId) { String query="SELECT h from OAIRecord as h where h.globalId = :globalId"; List<OAIRecord> oaiRecords = null; try { oaiRecords = em.createQuery(query).setParameter("globalId",globalId).getResultList(); } catch (Exception ex) { // Do nothing, return null. } return oaiRecords; } public List<OAIRecord> findOaiRecordsBySetName(String setName) { return findOaiRecordsBySetName(setName, null, null); } public List<OAIRecord> findOaiRecordsBySetName(String setName, Date from, Date until) { String queryString ="SELECT object(h) from OAIRecord as h where h.id is not null"; queryString += setName != null ? " and h.setName = :setName" : ""; // where h.setName is null"; queryString += from != null ? " and h.lastUpdateTime >= :from" : ""; queryString += until != null ? " and h.lastUpdateTime<=:until" : ""; logger.fine("Query: "+queryString); Query query = em.createQuery(queryString); if (setName != null) { query.setParameter("setName",setName); } if (from != null) { query.setParameter("from",from,TemporalType.TIMESTAMP); } // In order to achieve inclusivity on the "until" matching, we need to do // the following (if the "until" parameter is supplied): // 1) if the supplied "until" parameter has the time portion (and is not just // a date), we'll increment it by one second. This is because the time stamps we // keep in the database also have fractional thousands of a second. // So, a record may be shown as "T17:35:45", but in the database it is // actually "17:35:45.356", so "<= 17:35:45" isn't going to work on this // time stamp! - So we want to try "<= 17:35:45" instead. // 2) if it's just a date, we'll increment it by a *full day*. Otherwise // our database time stamp of 2016-10-23T17:35:45.123Z is NOT going to // match " <= 2016-10-23" - which is really going to be interpreted as // "2016-10-23T00:00:00.000". // -- L.A. 4.6 if (until != null) { // 24 * 3600 * 1000 = number of milliseconds in a day. if (until.getTime() % (24 * 3600 * 1000) == 0) { // The supplied "until" parameter is a date, with no time // portion. logger.fine("plain date. incrementing by one day"); until.setTime(until.getTime()+(24 * 3600 * 1000)); } else { logger.fine("date and time. incrementing by one second"); until.setTime(until.getTime()+1000); } query.setParameter("until",until,TemporalType.TIMESTAMP); } try { return query.getResultList(); } catch (Exception ex) { logger.fine("Caught exception; returning null."); return null; } } // This method is to only get the records NOT marked as "deleted": public List<OAIRecord> findActiveOaiRecordsBySetName(String setName) { String queryString ="SELECT object(h) from OAIRecord as h WHERE (h.removed != true)"; queryString += setName != null ? " and (h.setName = :setName)" : "and (h.setName is null)"; logger.fine("Query: "+queryString); Query query = em.createQuery(queryString); if (setName != null) { query.setParameter("setName",setName); } try { return query.getResultList(); } catch (Exception ex) { logger.fine("Caught exception; returning null."); return null; } } // This method is to only get the records marked as "deleted": public List<OAIRecord> findDeletedOaiRecordsBySetName(String setName) { String queryString ="SELECT object(h) from OAIRecord as h WHERE (h.removed = true)"; queryString += setName != null ? " and (h.setName = :setName)" : "and (h.setName is null)"; logger.fine("Query: "+queryString); Query query = em.createQuery(queryString); if (setName != null) { query.setParameter("setName",setName); } try { return query.getResultList(); } catch (Exception ex) { logger.fine("Caught exception; returning null."); return null; } } }