package org.gbif.occurrence.processor;
import org.gbif.api.model.occurrence.VerbatimOccurrence;
import org.gbif.api.vocabulary.OccurrencePersistenceStatus;
import org.gbif.common.messaging.api.MessagePublisher;
import org.gbif.common.messaging.api.messages.VerbatimPersistedMessage;
import org.gbif.occurrence.persistence.api.Fragment;
import org.gbif.occurrence.persistence.api.FragmentPersistenceService;
import org.gbif.occurrence.persistence.api.OccurrencePersistenceService;
import org.gbif.occurrence.processor.parsing.FragmentParser;
import org.gbif.occurrence.processor.zookeeper.ZookeeperConnector;
import java.io.IOException;
import java.util.UUID;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nullable;
import com.google.inject.Inject;
import com.google.inject.Singleton;
import com.yammer.metrics.Metrics;
import com.yammer.metrics.core.Meter;
import com.yammer.metrics.core.Timer;
import com.yammer.metrics.core.TimerContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
/**
* Takes occurrence Fragments and parses them to produce and persist VerbatimOccurrence objects.
*/
@Singleton
public class VerbatimProcessor {
private final FragmentPersistenceService fragmentPersister;
private final OccurrencePersistenceService occurrencePersister;
private final MessagePublisher messagePublisher;
private final ZookeeperConnector zookeeperConnector;
private final Meter verbProcessed = Metrics.newMeter(VerbatimProcessor.class, "verbs", "verbs", TimeUnit.SECONDS);
private final Timer msgTimer =
Metrics.newTimer(VerbatimProcessor.class, "msg send time", TimeUnit.MILLISECONDS, TimeUnit.SECONDS);
private static final Logger LOG = LoggerFactory.getLogger(VerbatimProcessor.class);
@Inject
public VerbatimProcessor(FragmentPersistenceService fragmentPersister,
OccurrencePersistenceService occurrencePersister, MessagePublisher messagePublisher,
ZookeeperConnector zookeeperConnector) {
this.fragmentPersister = checkNotNull(fragmentPersister, "fragmentPersister can't be null");
this.occurrencePersister = checkNotNull(occurrencePersister, "occurrencePersister can't be null");
this.messagePublisher = checkNotNull(messagePublisher, "messagePublisher can't be null");
this.zookeeperConnector = checkNotNull(zookeeperConnector, "zookeeperConnector can't be null");
}
/**
* Builds and persists a VerbatimOccurrence object by parsing an existing Fragment with the given occurrenceKey.
* Updated zookeeper with success/error counts and sends a VerbatimPersistedMessage when successfully completed. Note
* that UNCHANGED Fragments are ignored.
*
* @param occurrenceKey the key of the existing Fragment to be parsed
* @param status whether the Fragment is NEW, UPDATED, or UNCHANGED
* @param fromCrawl true if this method is called as part of a crawl
* @param attemptId the crawl attempt id, only used for passing along in logs and subsequent messages.
* @param datasetKey the dataset that this occurrence belongs to (must not be null if fromCrawl is true)
*/
public void buildVerbatim(int occurrenceKey, OccurrencePersistenceStatus status, boolean fromCrawl,
@Nullable Integer attemptId, @Nullable UUID datasetKey) {
checkArgument(occurrenceKey > 0, "occurrenceKey must be greater than 0");
checkNotNull(status, "status can't be null");
if (fromCrawl) {
checkNotNull(datasetKey, "datasetKey can't be null if fromCrawl is true");
checkArgument(attemptId != null && attemptId > 0, "attemptId must be greater than 0 if fromCrawl is true");
}
if (status == OccurrencePersistenceStatus.UNCHANGED) {
LOG.debug("Ignoring fragment of status UNCHANGED.");
return;
}
Fragment fragment = fragmentPersister.get(occurrenceKey);
if (fragment == null) {
logError("Could not find", occurrenceKey, datasetKey, fromCrawl);
return;
}
int localAttemptId = fromCrawl ? attemptId : fragment.getCrawlId();
LOG.debug("Fragment for key [{}] and UUID [{}] crawl [{}] is [{}]", occurrenceKey, fragment.getDatasetKey(),
localAttemptId, status);
VerbatimOccurrence verbatim = FragmentParser.parse(fragment);
if (verbatim == null) {
// parsing has failed, skip this fragment
logError("Could not parse", occurrenceKey, datasetKey, fromCrawl);
return;
}
occurrencePersister.update(verbatim);
if (fromCrawl) {
LOG.debug("Updating zookeeper for VerbatimOccurrencePersistedSuccess");
zookeeperConnector.addCounter(datasetKey, ZookeeperConnector.CounterName.VERBATIM_OCCURRENCE_PERSISTED_SUCCESS);
}
VerbatimPersistedMessage verbMsg =
new VerbatimPersistedMessage(verbatim.getDatasetKey(), localAttemptId, status, verbatim.getKey());
final TimerContext msgContext = msgTimer.time();
try {
messagePublisher.send(verbMsg);
} catch (IOException e) {
LOG.warn("Could not send VerbatimPersistedMessage for successful [{}]", status, e);
} finally {
msgContext.stop();
}
verbProcessed.mark();
}
private void logError(String message, int occurrenceKey, UUID datasetKey, boolean fromCrawl) {
// TODO: send msg?
LOG.warn(message + " fragment with key [{}] - skipping.", occurrenceKey);
if (fromCrawl) {
LOG.debug("Updating zookeeper for VerbatimOccurrencePersistedError");
zookeeperConnector.addCounter(datasetKey, ZookeeperConnector.CounterName.VERBATIM_OCCURRENCE_PERSISTED_ERROR);
}
}
}