package org.gbif.checklistbank.cli.normalizer;
import org.gbif.api.model.Constants;
import org.gbif.api.model.crawler.FinishReason;
import org.gbif.api.model.crawler.ProcessState;
import org.gbif.api.vocabulary.DatasetType;
import org.gbif.checklistbank.cli.common.Metrics;
import org.gbif.checklistbank.cli.common.RabbitDatasetService;
import org.gbif.checklistbank.cli.common.ZookeeperUtils;
import org.gbif.common.messaging.api.messages.ChecklistNormalizedMessage;
import org.gbif.common.messaging.api.messages.DwcaMetasyncFinishedMessage;
import org.gbif.nub.lookup.straight.IdLookup;
import org.gbif.nub.lookup.straight.IdLookupImpl;
import java.io.IOException;
import java.util.UUID;
import com.codahale.metrics.MetricRegistry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class NormalizerService extends RabbitDatasetService<DwcaMetasyncFinishedMessage> {
private static final Logger LOG = LoggerFactory.getLogger(NormalizerService.class);
private final NormalizerConfiguration cfg;
private final ZookeeperUtils zkUtils;
private IdLookup lookup;
private static final String QUEUE = "clb-normalizer";
public NormalizerService(NormalizerConfiguration cfg) {
super(QUEUE, cfg.poolSize, cfg.messaging, cfg.ganglia, "normalize");
this.cfg = cfg;
if (cfg.zookeeper.isConfigured()) {
try {
zkUtils = new ZookeeperUtils(cfg.zookeeper.getCuratorFramework());
} catch (IOException e) {
throw new RuntimeException(e);
}
} else {
LOG.warn("Zookeeper not configured. Crawl metadata will not be managed.");
zkUtils = null;
}
}
@Override
protected void initMetrics(MetricRegistry registry) {
super.initMetrics(registry);
registry.meter(Metrics.INSERT_METER);
registry.meter(Metrics.RELATION_METER);
registry.meter(Metrics.METRICS_METER);
registry.meter(Metrics.DENORMED_METER);
}
@Override
protected void startUpBeforeListening() throws Exception {
// loads all nub usages directly from clb postgres - this can take a few minutes
lookup = IdLookupImpl.temp().load(cfg.clb, false);
}
@Override
protected boolean ignore(DwcaMetasyncFinishedMessage msg) {
if (msg.getDatasetType() != DatasetType.CHECKLIST) {
LOG.info("Rejected dataset {} of type {}", msg.getDatasetUuid(), msg.getDatasetType());
return true;
}
return false;
}
@Override
protected void process(DwcaMetasyncFinishedMessage msg) throws Exception {
if (Constants.NUB_DATASET_KEY.equals(msg.getDatasetUuid())) {
LOG.warn("Refuse to normalize the GBIF backbone");
failed(msg.getDatasetUuid());
} else {
Normalizer normalizer = Normalizer.create(cfg, msg.getDatasetUuid(), getRegistry(), msg.getConstituents(), lookup);
normalizer.run();
if (cfg.zookeeper.isConfigured()) {
zkUtils.updateCounter(msg.getDatasetUuid(), ZookeeperUtils.PAGES_FRAGMENTED_SUCCESSFUL, 1l);
}
send(new ChecklistNormalizedMessage(msg.getDatasetUuid()));
}
}
@Override
protected void failed(UUID datasetKey) {
if (cfg.zookeeper.isConfigured()) {
zkUtils.createOrUpdate(datasetKey, ZookeeperUtils.FINISHED_REASON, FinishReason.ABORT);
zkUtils.createOrUpdate(datasetKey, ZookeeperUtils.PROCESS_STATE_CHECKLIST, ProcessState.FINISHED);
zkUtils.updateCounter(datasetKey, ZookeeperUtils.PAGES_FRAGMENTED_ERROR, 1l);
}
}
@Override
public Class<DwcaMetasyncFinishedMessage> getMessageClass() {
return DwcaMetasyncFinishedMessage.class;
}
}