package org.gbif.checklistbank.index.backfill; import org.gbif.api.service.checklistbank.DescriptionService; import org.gbif.api.service.checklistbank.DistributionService; import org.gbif.api.service.checklistbank.SpeciesProfileService; import org.gbif.api.service.checklistbank.VernacularNameService; import org.gbif.checklistbank.index.guice.AvroIndexingModule; import org.gbif.checklistbank.service.UsageService; import org.gbif.checklistbank.service.mybatis.DescriptionServiceMyBatis; import org.gbif.checklistbank.service.mybatis.DistributionServiceMyBatis; import org.gbif.checklistbank.service.mybatis.SpeciesProfileServiceMyBatis; import org.gbif.checklistbank.service.mybatis.VernacularNameServiceMyBatis; import java.util.Properties; import java.util.concurrent.Callable; import com.google.inject.Guice; import com.google.inject.Inject; import com.google.inject.Injector; import com.google.inject.name.Named; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Checklist Bank multithreaded name usage solr indexer. * This class creates a pool of configurable <i>threads</i> that concurrently execute a number of jobs * each processing a configurable number of name usages (<i>batchSize</i>) * using a configurable number of concurrent lucene <i>writers</i>. * The indexer makes direct use of the mybatis layer and requires a checklist bank datasource to be configured. */ public class AvroExporter extends NameUsageBatchProcessor { private static final Logger LOG = LoggerFactory.getLogger(AvroExporter.class); private String nameNode; private String targetHdfsDir; @Inject public AvroExporter(@Named(IndexingConfigKeys.THREADS) Integer threads, @Named(IndexingConfigKeys.NAME_NODE) String nameNode, @Named(IndexingConfigKeys.TARGET_HDFS_DIR) String targetHdfsDir, @Named(IndexingConfigKeys.BATCH_SIZE) Integer batchSize, @Named(IndexingConfigKeys.LOG_INTERVAL) Integer logInterval, UsageService nameUsageService, VernacularNameService vernacularNameService, DescriptionService descriptionService, DistributionService distributionService, SpeciesProfileService speciesProfileService) { super(threads, batchSize, logInterval, nameUsageService, vernacularNameService, descriptionService, distributionService, speciesProfileService); this.nameNode = nameNode; this.targetHdfsDir = targetHdfsDir; } /** * Entry point for execution. * Commandline arguments are: * 0: required path to property file */ public static void main(String[] args) throws Exception { if (args.length == 0) { throw new IllegalArgumentException("Path to property file required"); } // Creates the injector Properties props = loadProperties(args[0]); Injector injector = Guice.createInjector(new AvroIndexingModule(props)); // Gets the indexer instance AvroExporter nameUsageIndexer = injector.getInstance(AvroExporter.class); nameUsageIndexer.run(); // This statement is used because the Guice container is not stopped inside the threadpool. LOG.info("Indexing done. Time to exit."); System.exit(0); } @Override protected Callable<Integer> newBatchJob(int startKey, int endKey, UsageService nameUsageService, VernacularNameServiceMyBatis vernacularNameService, DescriptionServiceMyBatis descriptionService, DistributionServiceMyBatis distributionService, SpeciesProfileServiceMyBatis speciesProfileService) { return new AvroExportJob(nameUsageService, startKey, endKey, vernacularNameService, descriptionService, distributionService, speciesProfileService, nameNode, targetHdfsDir); } @Override protected void init() throws Exception { // nothing to do } @Override protected void postprocess() throws Exception { // nothing to do } }