package edu.harvard.iq.dataverse.harvest.client;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.DataFileServiceBean;
import edu.harvard.iq.dataverse.DataverseRequestServiceBean;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.EjbDataverseEngine;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.impl.DeleteHarvestingClientCommand;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.timer.DataverseTimerServiceBean;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.logging.Logger;
import javax.ejb.Asynchronous;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import javax.inject.Inject;
import javax.inject.Named;
import javax.persistence.EntityManager;
import javax.persistence.NoResultException;
import javax.persistence.NonUniqueResultException;
import javax.persistence.PersistenceContext;
/**
*
* @author Leonid Andreev
*
* Dedicated service for managing Harvesting Client Configurations
*/
@Stateless
@Named
public class HarvestingClientServiceBean implements java.io.Serializable {
@EJB
DataverseServiceBean dataverseService;
@EJB
EjbDataverseEngine engineService;
@EJB
DataFileServiceBean dataFileService;
@Inject
DataverseRequestServiceBean dvRequestService;
@EJB
IndexServiceBean indexService;
@EJB
DataverseTimerServiceBean dataverseTimerService;
@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;
private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvestingClinetServiceBean");
public HarvestingClient find(Object pk) {
return (HarvestingClient) em.find(HarvestingClient.class, pk);
}
public HarvestingClient findByNickname(String nickName) {
try {
return em.createNamedQuery("HarvestingClient.findByNickname", HarvestingClient.class)
.setParameter("nickName", nickName.toLowerCase())
.getSingleResult();
} catch ( NoResultException|NonUniqueResultException ex ) {
logger.fine("Unable to find a single harvesting client by nickname \"" + nickName + "\": " + ex);
return null;
}
}
public List<HarvestingClient> getAllHarvestingClients() {
try {
return em.createQuery("SELECT object(c) FROM HarvestingClient AS c WHERE c.harvestType='oai' ORDER BY c.name").getResultList();
} catch (Exception ex) {
logger.warning("Unknown exception caught while looking up configured Harvesting Clients: "+ex.getMessage());
}
return null;
}
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public void resetHarvestInProgress(Long hcId) {
HarvestingClient harvestingClient = em.find(HarvestingClient.class, hcId);
if (harvestingClient == null) {
return;
}
em.refresh(harvestingClient);
harvestingClient.setHarvestingNow(false);
// And if there is an unfinished RunResult object, we'll
// just mark it as a failure:
if (harvestingClient.getLastRun() != null
&& harvestingClient.getLastRun().isInProgress()) {
harvestingClient.getLastRun().setFailed();
}
}
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public void setHarvestInProgress(Long hcId, Date startTime) {
HarvestingClient harvestingClient = em.find(HarvestingClient.class, hcId);
if (harvestingClient == null) {
return;
}
em.refresh(harvestingClient);
harvestingClient.setHarvestingNow(true);
if (harvestingClient.getRunHistory() == null) {
harvestingClient.setRunHistory(new ArrayList<ClientHarvestRun>());
}
ClientHarvestRun currentRun = new ClientHarvestRun();
currentRun.setHarvestingClient(harvestingClient);
currentRun.setStartTime(startTime);
currentRun.setInProgress();
harvestingClient.getRunHistory().add(currentRun);
}
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public void setDeleteInProgress(Long hcId) {
HarvestingClient harvestingClient = em.find(HarvestingClient.class, hcId);
if (harvestingClient == null) {
return;
}
em.refresh(harvestingClient); // why are we doing this?
harvestingClient.setDeleteInProgress(true);
}
// Deleting a client, with all the associated content, can take a while -
// hence it's an async action:
// TOFIGUREOUT:
// for whatever reason I cannot call the DeleteHarvestingClientCommand from
// inside this method; something to do with it being asynchronous?
@Asynchronous
public void deleteClient(Long clientId) {
String errorMessage = null;
HarvestingClient victim = find(clientId);
if (victim == null) {
return;
}
try {
//engineService.submit(new DeleteHarvestingClientCommand(dvRequestService.getDataverseRequest(), victim));
HarvestingClient merged = em.merge(victim);
// if this was a scheduled harvester, make sure the timer is deleted:
dataverseTimerService.removeHarvestTimer(victim);
// purge indexed objects:
indexService.deleteHarvestedDocuments(victim);
// All the datasets harvested by this client will be cleanly deleted
// through the defined cascade. Cascaded delete does not work for harvested
// files, however. So they need to be removed explicitly; before we
// proceed removing the client itself.
for (DataFile harvestedFile : dataFileService.findHarvestedFilesByClient(merged)) {
DataFile mergedFile = em.merge(harvestedFile);
em.remove(mergedFile);
harvestedFile = null;
}
em.remove(merged);
} catch (Exception e) {
errorMessage = "Failed to delete cleint. Unknown exception: " + e.getMessage();
}
if (errorMessage != null) {
logger.warning(errorMessage);
}
}
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public void setHarvestSuccess(Long hcId, Date currentTime, int harvestedCount, int failedCount, int deletedCount) {
HarvestingClient harvestingClient = em.find(HarvestingClient.class, hcId);
if (harvestingClient == null) {
return;
}
em.refresh(harvestingClient);
ClientHarvestRun currentRun = harvestingClient.getLastRun();
if (currentRun != null && currentRun.isInProgress()) {
// TODO: what if there's no current run in progress? should we just
// give up quietly, or should we make a noise of some kind? -- L.A. 4.4
currentRun.setSuccess();
currentRun.setFinishTime(currentTime);
currentRun.setHarvestedDatasetCount(new Long(harvestedCount));
currentRun.setFailedDatasetCount(new Long(failedCount));
currentRun.setDeletedDatasetCount(new Long(deletedCount));
}
}
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public void setHarvestFailure(Long hcId, Date currentTime) {
HarvestingClient harvestingClient = em.find(HarvestingClient.class, hcId);
if (harvestingClient == null) {
return;
}
em.refresh(harvestingClient);
ClientHarvestRun currentRun = harvestingClient.getLastRun();
if (currentRun != null && currentRun.isInProgress()) {
// TODO: what if there's no current run in progress? should we just
// give up quietly, or should we make a noise of some kind? -- L.A. 4.4
currentRun.setFailed();
currentRun.setFinishTime(currentTime);
}
}
public Long getNumberOfHarvestedDatasetByClients(List<HarvestingClient> clients) {
String dvs = null;
for (HarvestingClient client: clients) {
if (dvs == null) {
dvs = client.getDataverse().getId().toString();
} else {
dvs = dvs.concat(","+client.getDataverse().getId().toString());
}
}
try {
return (Long) em.createNativeQuery("SELECT count(d.id) FROM dataset d, "
+ " dvobject o WHERE d.id = o.id AND o.owner_id in ("
+ dvs + ")").getSingleResult();
} catch (Exception ex) {
logger.info("Warning: exception trying to count harvested datasets by clients: " + ex.getMessage());
return 0L;
}
}
}