package edu.harvard.iq.dataverse.search;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.DvObjectServiceBean;
import edu.harvard.iq.dataverse.util.SystemConfig;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.Future;
import java.util.logging.Logger;
import javax.ejb.AsyncResult;
import javax.ejb.Asynchronous;
import javax.ejb.EJB;
import javax.ejb.Stateless;
import javax.inject.Named;
import javax.json.Json;
import javax.json.JsonArrayBuilder;
import javax.json.JsonObject;
import javax.json.JsonObjectBuilder;
import org.apache.solr.client.solrj.SolrServerException;
@Named
@Stateless
public class IndexAllServiceBean {
private static final Logger logger = Logger.getLogger(IndexAllServiceBean.class.getCanonicalName());
@EJB
IndexServiceBean indexService;
@EJB
SolrIndexServiceBean solrIndexService;
@EJB
DataverseServiceBean dataverseService;
@EJB
DatasetServiceBean datasetService;
@EJB
DvObjectServiceBean dvObjectService;
@EJB
SystemConfig systemConfig;
@Asynchronous
public Future<JsonObjectBuilder> indexAllOrSubset(long numPartitions, long partitionId, boolean skipIndexed, boolean previewOnly) {
JsonObjectBuilder response = Json.createObjectBuilder();
Future<String> responseFromIndexAllOrSubset = indexAllOrSubset(numPartitions, partitionId, skipIndexed);
String status = "indexAllOrSubset has begun";
response.add("responseFromIndexAllOrSubset", status);
return new AsyncResult<>(response);
}
public JsonObjectBuilder indexAllOrSubsetPreview(long numPartitions, long partitionId, boolean skipIndexed) {
JsonObjectBuilder response = Json.createObjectBuilder();
JsonObjectBuilder previewOfWorkload = Json.createObjectBuilder();
JsonObjectBuilder dvContainerIds = Json.createObjectBuilder();
JsonArrayBuilder dataverseIds = Json.createArrayBuilder();
List<Dataverse> dataverses = dataverseService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
for (Dataverse dataverse : dataverses) {
dataverseIds.add(dataverse.getId());
}
JsonArrayBuilder datasetIds = Json.createArrayBuilder();
List<Dataset> datasets = datasetService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
for (Dataset dataset : datasets) {
datasetIds.add(dataset.getId());
}
dvContainerIds.add("dataverses", dataverseIds);
dvContainerIds.add("datasets", datasetIds);
previewOfWorkload.add("dvContainerIds", dvContainerIds);
previewOfWorkload.add("dataverseCount", dataverses.size());
previewOfWorkload.add("datasetCount", datasets.size());
previewOfWorkload.add("partitionId", partitionId);
response.add("previewOfPartitionWorkload", previewOfWorkload);
return response;
}
public Future<String> indexAllOrSubset(long numPartitions, long partitionId, boolean skipIndexed) {
long indexAllTimeBegin = System.currentTimeMillis();
String status;
String resultOfClearingIndexTimes;
/**
* @todo Should we allow sysadmins to request that the Solr index and
* related timestamps in the database be cleared as part of "index all"?
* If so, we can make this boolean a parameter that's passed into this
* method. A method to do this clearing has been added as a separate API
* endpoint.
*/
boolean clearSolrAndTimestamps = false;
/**
* We only allow clearing of Solr and database index timestamps if we
* are operating on the entire index ("index all") and if we are not
* running in "continue" mode.
*/
if (numPartitions == 1 && !skipIndexed && clearSolrAndTimestamps) {
logger.info("attempting to delete all Solr documents before a complete re-index");
try {
JsonObject response = solrIndexService.deleteAllFromSolrAndResetIndexTimes().build();
String message = response.getString(SolrIndexServiceBean.messageString);
int numRowsCleared = response.getInt(SolrIndexServiceBean.numRowsClearedByClearAllIndexTimes);
resultOfClearingIndexTimes = message + " Database rows from which index timestamps were cleared: " + numRowsCleared;
} catch (SolrServerException | IOException ex) {
resultOfClearingIndexTimes = "Solr index and database timestamps were not cleared: " + ex;
}
} else {
resultOfClearingIndexTimes = "Solr index was not cleared before indexing.";
}
List<Dataverse> dataverses = dataverseService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
int dataverseIndexCount = 0;
for (Dataverse dataverse : dataverses) {
dataverseIndexCount++;
logger.info("indexing dataverse " + dataverseIndexCount + " of " + dataverses.size() + " (id=" + dataverse.getId() + ", persistentId=" + dataverse.getAlias() + ")");
Future<String> result = indexService.indexDataverseInNewTransaction(dataverse);
}
int datasetIndexCount = 0;
List<Dataset> datasets = datasetService.findAllOrSubset(numPartitions, partitionId, skipIndexed);
for (Dataset dataset : datasets) {
datasetIndexCount++;
logger.info("indexing dataset " + datasetIndexCount + " of " + datasets.size() + " (id=" + dataset.getId() + ", persistentId=" + dataset.getGlobalId() + ")");
Future<String> result = indexService.indexDatasetInNewTransaction(dataset);
}
// logger.info("advanced search fields: " + advancedSearchFields);
// logger.info("not advanced search fields: " + notAdvancedSearchFields);
logger.info("done iterating through all datasets");
long indexAllTimeEnd = System.currentTimeMillis();
String timeElapsed = "index all took " + (indexAllTimeEnd - indexAllTimeBegin) + " milliseconds";
logger.info(timeElapsed);
status = dataverseIndexCount + " dataverses and " + datasetIndexCount + " datasets indexed. " + timeElapsed + ". " + resultOfClearingIndexTimes + "\n";
logger.info(status);
return new AsyncResult<>(status);
}
}