package org.gbif.checklistbank.cli.importer;
import org.gbif.api.model.Constants;
import org.gbif.api.model.crawler.FinishReason;
import org.gbif.api.model.crawler.ProcessState;
import org.gbif.api.service.checklistbank.NameUsageService;
import org.gbif.checklistbank.cli.common.RabbitDatasetService;
import org.gbif.checklistbank.cli.common.ZookeeperUtils;
import org.gbif.checklistbank.cli.registry.RegistryService;
import org.gbif.checklistbank.index.guice.RealTimeModule;
import org.gbif.checklistbank.index.guice.Solr;
import org.gbif.checklistbank.service.DatasetImportService;
import org.gbif.checklistbank.service.UsageService;
import org.gbif.checklistbank.service.mybatis.guice.ChecklistBankServiceMyBatisModule;
import org.gbif.checklistbank.service.mybatis.guice.Mybatis;
import org.gbif.common.messaging.api.messages.ChecklistNormalizedMessage;
import org.gbif.common.messaging.api.messages.ChecklistSyncedMessage;
import java.io.IOException;
import java.util.Date;
import java.util.UUID;
import com.google.common.annotations.VisibleForTesting;
import com.google.inject.Key;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ImporterService extends RabbitDatasetService<ChecklistNormalizedMessage> {
private static final Logger LOG = LoggerFactory.getLogger(ImporterService.class);
private final ImporterConfiguration cfg;
private DatasetImportService sqlService;
private DatasetImportService solrService;
private NameUsageService nameUsageService;
private UsageService usageService;
private final ZookeeperUtils zkUtils;
public ImporterService(ImporterConfiguration cfg) {
super("clb-importer", cfg.poolSize, cfg.messaging, cfg.ganglia, "import", ChecklistBankServiceMyBatisModule.create(cfg.clb), new RealTimeModule(cfg.solr));
this.cfg = cfg;
if (cfg.zookeeper.isConfigured()) {
try {
zkUtils = new ZookeeperUtils(cfg.zookeeper.getCuratorFramework());
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
LOG.warn("Zookeeper not configured. Crawl metadata will not be managed.");
zkUtils = null;
}
// init mybatis layer and solr once from cfg instance
sqlService = getInstance(Key.get(DatasetImportService.class, Mybatis.class));
solrService = getInstance(Key.get(DatasetImportService.class, Solr.class));
nameUsageService = getInstance(NameUsageService.class);
usageService = getInstance(UsageService.class);
}
@Override
protected void process(ChecklistNormalizedMessage msg) throws Exception {
try {
Importer importer = Importer.create(cfg, msg.getDatasetUuid(), nameUsageService, usageService, sqlService, solrService);
importer.run();
// notify rabbit
Date crawlFinished;
if (cfg.zookeeper.isConfigured()) {
crawlFinished = zkUtils.getDate(msg.getDatasetUuid(), ZookeeperUtils.FINISHED_CRAWLING);
if (crawlFinished == null) {
LOG.warn("No crawlFinished date found in zookeeper, use current date instead for dataset {}", msg.getDatasetUuid());
crawlFinished = new Date();
}
} else {
crawlFinished = new Date();
}
send(new ChecklistSyncedMessage(msg.getDatasetUuid(), crawlFinished, importer.getSyncCounter(), importer.getDelCounter()));
// finally delete artifacts unless configured not to or it is the nub!
if (cfg.deleteNeo && !Constants.NUB_DATASET_KEY.equals(msg.getDatasetUuid())) {
RegistryService.deleteStorageFiles(cfg.neo, msg.getDatasetUuid());
}
} finally {
if (cfg.zookeeper.isConfigured()) {
zkUtils.createOrUpdate(msg.getDatasetUuid(), ZookeeperUtils.PROCESS_STATE_CHECKLIST, ProcessState.FINISHED);
}
}
}
@Override
protected void failed(UUID datasetKey) {
if (cfg.zookeeper.isConfigured()) {
zkUtils.createOrUpdate(datasetKey, ZookeeperUtils.FINISHED_REASON, FinishReason.ABORT);
}
}
@Override
@VisibleForTesting
protected void startUp() throws Exception {
super.startUp();
}
@Override
protected void shutDown() throws Exception {
sqlService.close();
solrService.close();
super.shutDown();
}
@Override
public Class<ChecklistNormalizedMessage> getMessageClass() {
return ChecklistNormalizedMessage.class;
}
}