package org.gbif.checklistbank.index.backfill; import org.gbif.api.model.checklistbank.Description; import org.gbif.api.model.checklistbank.Distribution; import org.gbif.api.model.checklistbank.NameUsage; import org.gbif.api.model.checklistbank.SpeciesProfile; import org.gbif.api.model.checklistbank.VernacularName; import org.gbif.checklistbank.index.NameUsageDocConverter; import org.gbif.checklistbank.model.UsageExtensions; import org.gbif.checklistbank.service.UsageService; import org.gbif.checklistbank.service.mybatis.DescriptionServiceMyBatis; import org.gbif.checklistbank.service.mybatis.DistributionServiceMyBatis; import org.gbif.checklistbank.service.mybatis.SpeciesProfileServiceMyBatis; import org.gbif.checklistbank.service.mybatis.VernacularNameServiceMyBatis; import java.util.List; import java.util.Map; import java.util.concurrent.Callable; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.commons.lang3.time.StopWatch; import org.apache.solr.client.solrj.SolrClient; import org.apache.solr.common.SolrInputDocument; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Executable job that creates a list of {@link SolrInputDocument} using a list of {@link NameUsage} objects. */ public class NameUsageIndexingJob implements Callable<Integer> { private final Logger log = LoggerFactory.getLogger(getClass()); private static final int batchSize = 100; /** * SolrServer instance. */ private final SolrClient solrClient; /** * Minimum usage key, inclusive, to process. */ private final int startKey; /** * Maximum usage key, inclusive, to process. */ private final int endKey; /** * Service layer. */ private final UsageService nameUsageService; private final VernacularNameServiceMyBatis vernacularNameService; private final DescriptionServiceMyBatis descriptionService; private final DistributionServiceMyBatis distributionService; private final SpeciesProfileServiceMyBatis speciesProfileService; private StopWatch stopWatch = new StopWatch(); /** * {@link NameUsage}/{@link SolrInputDocument} converter. */ private final NameUsageDocConverter solrDocumentConverter; /** * Default constructor. */ public NameUsageIndexingJob(final SolrClient solrClient, final UsageService nameUsageService, final int startKey, final int endKey, final NameUsageDocConverter solrDocumentConverter, final VernacularNameServiceMyBatis vernacularNameService, final DescriptionServiceMyBatis descriptionService, final DistributionServiceMyBatis distributionService, final SpeciesProfileServiceMyBatis speciesProfileService) { this.nameUsageService = nameUsageService; this.vernacularNameService = vernacularNameService; this.descriptionService = descriptionService; this.distributionService = distributionService; this.speciesProfileService = speciesProfileService; this.startKey = startKey; this.endKey = endKey; this.solrDocumentConverter = solrDocumentConverter; this.solrClient = solrClient; } /** * Iterates over the assigned {@link NameUsage} objects to insert the corresponding {@link SolrInputDocument} * objects. * * @return the total number of documents added by this Thread. */ @Override public Integer call() throws Exception { // Timing information initialization stopWatch.start(); log.info("Adding usages from id {} to {}", startKey, endKey); int docCount = 0; // Get all usages List<NameUsage> usages = nameUsageService.listRange(startKey, endKey); // get all component maps into memory first Map<Integer, List<VernacularName>> vernacularNameMap = vernacularNameService.listRange(startKey, endKey); Map<Integer, List<Description>> descriptionMap = descriptionService.listRange(startKey, endKey); Map<Integer, List<Distribution>> distributionMap = distributionService.listRange(startKey, endKey); Map<Integer, List<SpeciesProfile>> speciesProfileMap = speciesProfileService.listRange(startKey, endKey); // now we're ready to build the solr indices quicky! for (Iterable<NameUsage> batch : Iterables.partition(usages, batchSize)) { final List<SolrInputDocument> docs = Lists.newArrayList(); try { for (NameUsage usage : batch) { if (usage==null) { log.warn("Unexpected null usage found in range {}-{}, docCount={}", startKey, endKey, docCount); continue; } UsageExtensions ext = new UsageExtensions(); ext.speciesProfiles = speciesProfileMap.get(usage.getKey()); ext.vernacularNames = vernacularNameMap.get(usage.getKey()); ext.descriptions = descriptionMap.get(usage.getKey()); ext.distributions = distributionMap.get(usage.getKey()); List<Integer> parents = nameUsageService.listParents(usage.getKey()); docs.add(solrDocumentConverter.toDoc(usage, parents, ext)); docCount++; } solrClient.add(docs); NameUsageBatchProcessor.counter.addAndGet(docs.size()); } catch (Exception e) { log.error("Error indexing document for usage batch", e); } } // job finished notice stopWatch.stop(); log.info("Finished indexing of usages in range {}-{}. Total time: {}", startKey, endKey, stopWatch.toString()); return docCount; } }