package org.gbif.occurrence.cli.crawl;
import org.gbif.common.messaging.api.MessagePublisher;
import org.gbif.common.messaging.api.messages.DeleteOccurrenceMessage;
import org.gbif.common.messaging.api.messages.OccurrenceDeletionReason;
import java.io.IOException;
import java.sql.Connection;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.UUID;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Service that can emits delete message for all gbifid linked to previous crawls.
*/
class DeletePreviousCrawlsService {
private static final Logger LOG = LoggerFactory.getLogger(DeletePreviousCrawlsService.class);
private PreviousCrawlsManagerConfiguration config;
private MessagePublisher publisher;
// we use a smaller than (<) instead of not equals (<>) to avoid deleting records of a potential new crawl
// that started
private static final String SQL_QUERY_GET_OTHER_CRAWL_ID = "SELECT gbifid FROM " +
" %s WHERE datasetkey = ? AND crawlid < ?";
private static final Function<String, String> getSqlCommand = (tableName) ->
String.format(SQL_QUERY_GET_OTHER_CRAWL_ID, tableName);
private static final int DATASET_KEY_IDX = 1;
private static final int CRAWL_ID_IDX = 2;
DeletePreviousCrawlsService(PreviousCrawlsManagerConfiguration config, MessagePublisher publisher) {
this.config = config;
this.publisher = publisher;
}
private void sendDeleteMessage(int occurrenceKey) throws IOException {
//Maybe it should use OccurrenceDeletionReason.NOT_SEEN_IN_LAST_CRAWL but it seems OccurrenceDeletionReason is
//never used
this.publisher.send(new DeleteOccurrenceMessage(occurrenceKey, OccurrenceDeletionReason.OCCURRENCE_MANUAL, null, null));
}
public void close() {
publisher.close();
}
/**
* Sends delete message for all occurrence records that are coming from a crawl before lastSuccessfulCrawl.
* @param datasetKey
* @param lastSuccessfulCrawl
* @return the number of delete message emitted.
*/
public int deleteOccurrenceInPreviousCrawls(UUID datasetKey, int lastSuccessfulCrawl) {
int numberOfMessageEmitted = 0;
try (Connection conn = config.hive.buildHiveConnection();
PreparedStatement stmt = conn.prepareStatement(getSqlCommand.apply(config.hiveOccurrenceTable))) {
stmt.setString(DATASET_KEY_IDX, datasetKey.toString());
stmt.setInt(CRAWL_ID_IDX, lastSuccessfulCrawl);
try (ResultSet rs = stmt.executeQuery()) {
while (rs.next()) {
sendDeleteMessage(rs.getInt(1));
numberOfMessageEmitted++;
}
} catch (IOException e) {
LOG.error("Error while deleting records for dataset " + datasetKey , e);
}
} catch (SQLException e) {
LOG.error("Error while deleting records for dataset " + datasetKey , e);
}
return numberOfMessageEmitted;
}
}