package org.gbif.checklistbank.cli; import org.gbif.api.service.checklistbank.NameUsageService; import org.gbif.checklistbank.cli.common.NeoConfiguration; import org.gbif.checklistbank.cli.importer.Importer; import org.gbif.checklistbank.cli.importer.ImporterConfiguration; import org.gbif.checklistbank.cli.normalizer.Normalizer; import org.gbif.checklistbank.cli.normalizer.NormalizerConfiguration; import org.gbif.checklistbank.cli.normalizer.NormalizerStats; import org.gbif.checklistbank.index.guice.RealTimeModule; import org.gbif.checklistbank.index.guice.Solr; import org.gbif.checklistbank.service.DatasetImportService; import org.gbif.checklistbank.service.UsageService; import org.gbif.checklistbank.service.mybatis.guice.ChecklistBankServiceMyBatisModule; import org.gbif.checklistbank.service.mybatis.guice.InternalChecklistBankServiceMyBatisModule; import org.gbif.checklistbank.service.mybatis.guice.Mybatis; import org.gbif.common.search.solr.SolrServerType; import org.gbif.nub.lookup.straight.IdLookupPassThru; import org.gbif.utils.HttpUtil; import org.gbif.utils.file.CompressionUtil; import java.io.File; import java.io.IOException; import java.sql.SQLException; import java.util.UUID; import com.codahale.metrics.MetricRegistry; import com.google.common.collect.Maps; import com.google.inject.Guice; import com.google.inject.Injector; import com.google.inject.Key; import com.zaxxer.hikari.HikariDataSource; /** * Utility to manually index an external checklist and download, normalize and import it. * ATTENTION: this is a class mainly for debugging and configs are for simplicity all in the code !!! */ public class IndexerApp { UUID datasetKey; private NormalizerConfiguration nCfg; private ImporterConfiguration iCfg; private HikariDataSource hds; public void index(String repo, String url, UUID datasetKey) throws Exception { this.datasetKey = datasetKey; init(repo); download(url); normalize(); sync(); } private void init(String repo) throws IOException, SQLException { System.out.println("Init environment for dataset " + datasetKey); File tmp = new File(repo); File dwca = new File(tmp, "dwca"); File neo = new File(tmp, "neo"); nCfg = new NormalizerConfiguration(); nCfg.neo = new NeoConfiguration(); nCfg.neo.neoRepository = neo; nCfg.archiveRepository = dwca; iCfg = new ImporterConfiguration(); iCfg.neo = nCfg.neo; iCfg.deleteNeo = false; iCfg.clb.serverName = "localhost"; iCfg.clb.databaseName = "clb"; iCfg.clb.user = "postgres"; iCfg.clb.password = "pogo"; iCfg.solr.serverType = SolrServerType.CLOUD; iCfg.solr.serverHome = "c1n1.gbif.org:2181,c1n2.gbif.org:2181,c1n3.gbif.org:2181/solrdev"; iCfg.solr.collection = "dev_checklistbank"; //iCfg.solr.serverType = SolrServerType.HTTP; //iCfg.solr.serverHome="http://apps2.gbif-dev.org:8082/checklistbank-solr"; } private void download(String url) throws IOException { HttpUtil hutil = new HttpUtil(HttpUtil.newMultithreadedClient(2000, 5, 2)); File dwca = File.createTempFile("clb", ".dwca"); hutil.download(url, dwca); System.out.println("Downloaded raw archive to " + dwca.getAbsolutePath()); CompressionUtil.decompressFile(nCfg.archiveDir(datasetKey), dwca); System.out.println("Decompressed archive to " + nCfg.archiveDir(datasetKey)); } private void normalize() { MetricRegistry registry = new MetricRegistry(); Normalizer norm = Normalizer.create(nCfg, datasetKey, registry, Maps.<String, UUID>newHashMap(), new IdLookupPassThru()); norm.run(); NormalizerStats stats = norm.getStats(); System.out.println(stats); } private void sync() throws Exception { // init mybatis layer and solr from cfg instance Injector inj = Guice.createInjector(ChecklistBankServiceMyBatisModule.create(iCfg.clb), new RealTimeModule(iCfg.solr)); hds = (HikariDataSource) inj.getInstance(InternalChecklistBankServiceMyBatisModule.DATASOURCE_KEY); NameUsageService nameUsageService = inj.getInstance(NameUsageService.class); UsageService usageService = inj.getInstance(UsageService.class); DatasetImportService sqlService = inj.getInstance(Key.get(DatasetImportService.class, Mybatis.class)); DatasetImportService solrService = inj.getInstance(Key.get(DatasetImportService.class, Solr.class)); try { Importer importer = Importer.create(iCfg, datasetKey, nameUsageService, usageService, sqlService, solrService); importer.run(); } finally { sqlService.close(); solrService.close(); hds.close(); } } public static void main(String[] args) throws Exception { IndexerApp imp = new IndexerApp(); //imp.index("/Users/markus/nub-repo", "", Constants.NUB_DATASET_KEY); imp.index("/Users/markus/Desktop/repo", "http://services.snsb.info/DTNtaxonlists/rest/v0.1/lists/DiversityTaxonNames_Fossils/1154/dwc", UUID.fromString("f096326f-8f98-4301-886b-d715e87e1d4e")); // imp.index("/Users/markus/Desktop/repo", // "http://bdj.pensoft.net/lib/ajax_srv/archive_download.php?archive_type=2&document_id=4878", // UUID.fromString("e18d65e8-3e8e-4cce-b309-256346f99de3")); // imp.index("/Users/markus/Desktop/repo", // "http://data.canadensys.net/ipt/archive.do?r=vascan", // UUID.fromString("3f8a1297-3259-4700-91fc-acc4170b27ce")); // imp.index("/Users/markus/Desktop/repo", // "http://www.catalogueoflife.org/DCA_Export/zip/archive-complete.zip", // UUID.fromString("7ddf754f-d193-4cc9-b351-99906754a03b")); } }