package org.gbif.checklistbank.cli.importer; import org.gbif.checklistbank.cli.common.NeoConfiguration; import org.gbif.checklistbank.cli.normalizer.Normalizer; import org.gbif.checklistbank.cli.normalizer.NormalizerConfiguration; import org.gbif.checklistbank.cli.normalizer.NormalizerStats; import org.gbif.nub.lookup.straight.IdLookupPassThru; import org.gbif.utils.HttpUtil; import org.gbif.utils.file.CompressionUtil; import java.io.File; import java.io.IOException; import java.sql.SQLException; import java.sql.Statement; import java.util.UUID; import com.beust.jcommander.internal.Maps; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.google.common.collect.Lists; import com.google.common.io.Resources; import com.codahale.metrics.MetricRegistry; import org.junit.Ignore; import org.postgresql.core.BaseConnection; @Ignore("A manual test class") /** * Test to manually index an external checklist and download, normalize and import it. * For importing it uses the test resource yaml config file where you can turn on solr if needed! */ public class ManualImport { UUID datasetKey; private NormalizerConfiguration nCfg; private ImporterConfiguration iCfg; public void index(String repo, String url, UUID datasetKey) throws Exception { this.datasetKey = datasetKey; init(repo, false); download(url); normalize(); sync(); } private void init(String repo, boolean truncate) throws IOException, SQLException { System.out.println("Init environment for dataset " + datasetKey); File tmp = new File(repo); File dwca = new File(tmp, "dwca"); File neo = new File(tmp, "neo"); final ObjectMapper mapper = new ObjectMapper(new YAMLFactory()); nCfg = new NormalizerConfiguration(); nCfg.neo = new NeoConfiguration(); nCfg.neo.neoRepository = neo; nCfg.archiveRepository = dwca; iCfg = mapper.readValue(Resources.getResource("cfg-importer.yaml"), ImporterConfiguration.class); iCfg.neo = nCfg.neo; iCfg.deleteNeo = false; iCfg.clb.serverName = "localhost"; iCfg.clb.databaseName = "clb"; iCfg.clb.user = "postgres"; iCfg.clb.password = "pogo"; //iCfg.solr.serverType = SolrServerType.HTTP; //iCfg.solr.serverHome="http://apps2.gbif-dev.org:8082/checklistbank-solr"; // truncate tables? if (truncate) { try (BaseConnection c = (BaseConnection) iCfg.clb.connect()){ try (Statement st = c.createStatement()) { for (String table : Lists.newArrayList("name_usage_metrics", "raw_usage", "name_usage", "citation", "name")) { st.execute("TRUNCATE "+table+" CASCADE"); } } } System.out.println("Truncated clb tables"); } } private void download(String url) throws IOException { HttpUtil hutil = new HttpUtil(HttpUtil.newMultithreadedClient(2000, 5, 2)); File dwca = File.createTempFile("clb", ".dwca"); hutil.download(url, dwca); System.out.println("Downloaded raw archive to " + dwca.getAbsolutePath()); CompressionUtil.decompressFile(nCfg.archiveDir(datasetKey), dwca); System.out.println("Decompressed archive to " + nCfg.archiveDir(datasetKey)); } private void normalize() { MetricRegistry registry = new MetricRegistry(); Normalizer norm = Normalizer.create(nCfg, datasetKey, registry, Maps.<String, UUID>newHashMap(), new IdLookupPassThru()); norm.run(); NormalizerStats stats = norm.getStats(); System.out.println(stats); } private void sync() throws Exception { ImporterIT iit = new ImporterIT(); Importer importer = iit.build(iCfg, datasetKey); importer.run(); iit.close(); } public static void main(String[] args) throws Exception { ManualImport imp = new ManualImport(); // imp.index("/Users/markus/nub-repo", "", Constants.NUB_DATASET_KEY); // imp.index("/Users/markus/Desktop/repo", // "http://plazi.cs.umb.edu/GgServer/dwca/87A1ADC3C0C450976B05972ED1005EFC.zip", // UUID.fromString("0f66de86-d95f-47d1-af8d-b126ac38857a")); imp.index("/Users/markus/Desktop/repo", "http://data.canadensys.net/ipt/archive.do?r=vascan", UUID.fromString("3f8a1297-3259-4700-91fc-acc4170b27ce")); // imp.index("/Users/markus/Desktop/repo", // "http://www.catalogueoflife.org/DCA_Export/zip/archive-complete.zip", // UUID.fromString("7ddf754f-d193-4cc9-b351-99906754a03b")); } }