package org.gbif.checklistbank.cli; import org.gbif.api.service.checklistbank.NameUsageService; import org.gbif.checklistbank.cli.importer.Importer; import org.gbif.checklistbank.cli.importer.ImporterConfiguration; import org.gbif.checklistbank.cli.normalizer.Normalizer; import org.gbif.checklistbank.cli.normalizer.NormalizerConfiguration; import org.gbif.checklistbank.index.guice.RealTimeModule; import org.gbif.checklistbank.index.guice.Solr; import org.gbif.checklistbank.service.DatasetImportService; import org.gbif.checklistbank.service.mybatis.guice.ChecklistBankServiceMyBatisModule; import org.gbif.checklistbank.service.mybatis.guice.InternalChecklistBankServiceMyBatisModule; import org.gbif.checklistbank.service.mybatis.guice.Mybatis; import org.gbif.checklistbank.utils.ResourcesMonitor; import org.gbif.checklistbank.utils.RunnableAdapter; import org.gbif.common.search.solr.SolrServerType; import org.gbif.utils.file.CompressionUtil; import java.io.File; import java.io.PrintStream; import java.nio.file.Files; import java.sql.Connection; import java.sql.SQLException; import java.util.LinkedList; import java.util.List; import java.util.Timer; import java.util.UUID; import java.util.concurrent.ExecutorCompletionService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.dataformat.yaml.YAMLFactory; import com.google.common.collect.Lists; import com.google.common.io.Resources; import com.google.inject.Guice; import com.google.inject.Injector; import com.google.inject.Key; import com.codahale.metrics.MetricRegistry; import com.zaxxer.hikari.HikariDataSource; import org.apache.commons.io.FileUtils; import org.junit.After; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; @Ignore("manual long running test to discover why we see too many open files in heavy importer cli use") public class MultiThreadingCliTest { private static final ObjectMapper CFG_MAPPER = new ObjectMapper(new YAMLFactory()); private final int threads = 5; private MetricRegistry registry; private NormalizerConfiguration cfgN; private ImporterConfiguration cfgI; private File zip; private ResourcesMonitor monitor; private NameUsageService usageService; private HikariDataSource hds; DatasetImportService sqlService; DatasetImportService solrService; @Before public void init() throws Exception { registry = new MetricRegistry(); cfgN = new NormalizerConfiguration(); cfgN.neo.neoRepository = Files.createTempDirectory("neotest").toFile(); cfgN.archiveRepository = Files.createTempDirectory("neotestdwca").toFile(); cfgI = CFG_MAPPER.readValue(Resources.getResource("cfg-importer.yaml"), ImporterConfiguration.class); cfgI.neo = cfgN.neo; System.out.println("Using postgres instance" + cfgI.clb.serverName + " " + cfgI.clb.databaseName); zip = new File(getClass().getResource("/plazi.zip").getFile()); zip = new File("/Users/markus/code/checklistbank/checklistbank-cli/src/test/resources/plazi.zip"); Timer timer = new Timer(); monitor = new ResourcesMonitor(); timer.schedule(monitor, 1000); } @After public void cleanup() { FileUtils.deleteQuietly(cfgN.neo.neoRepository); FileUtils.deleteQuietly(cfgN.archiveRepository); if (hds != null) { hds.close(); } } @Test public void manyNormalizersInParallel() throws Exception { final int tasks = 500; ExecutorCompletionService<Object> ecs = new ExecutorCompletionService(Executors.newFixedThreadPool(threads)); List<Future<Object>> futures = Lists.newArrayList(); for (int i = 0; i < tasks; i++) { UUID dk = UUID.randomUUID(); // copy dwca File dwca = cfgN.archiveDir(dk); CompressionUtil.decompressFile(dwca, this.zip); Normalizer normalizer = Normalizer.create(cfgN, registry, dk); System.out.println("Submit normalizer " + i); futures.add(ecs.submit(Executors.callable(normalizer))); } int idx = 1; for (Future<Object> f : futures) { f.get(); System.out.println("Finished normalizer " + idx++); monitor.run(); } System.out.println("Finished all jobs"); monitor.run(); for (Thread t : Thread.getAllStackTraces().keySet()) { System.out.println(t.getState() + " " + t.getName()); } } @Test public void manyImporterInParallel() throws Exception { final int tasks = 1000; PrintStream log = System.out; // init mybatis layer and solr from cfgN instance cfgI.solr.serverHome = "http://apps2.gbif-dev.org:8082/checklistbank-solr"; cfgI.solr.serverType = SolrServerType.HTTP; Injector inj = Guice.createInjector(ChecklistBankServiceMyBatisModule.create(cfgI.clb), new RealTimeModule(cfgI.solr)); usageService = inj.getInstance(NameUsageService.class); sqlService = inj.getInstance(Key.get(DatasetImportService.class, Mybatis.class)); solrService = inj.getInstance(Key.get(DatasetImportService.class, Solr.class)); hds = (HikariDataSource) inj.getInstance(InternalChecklistBankServiceMyBatisModule.DATASOURCE_KEY); // truncate tables log.println("Truncate existing data"); Connection cn = hds.getConnection(); java.sql.Statement st = cn.createStatement(); st.execute("truncate name_usage cascade"); st.execute("truncate name cascade"); st.execute("truncate citation cascade"); st.close(); cn.close(); ExecutorCompletionService<UUID> ecs = new ExecutorCompletionService(Executors.<UUID>newFixedThreadPool(threads)); LinkedList<Future<UUID>> futures = Lists.newLinkedList(); log.println("Start creating normalization tasks"); LinkedList<Normalizer> normalizers = Lists.newLinkedList(); for (int i = 0; i < tasks; i++) { UUID dk = UUID.randomUUID(); // copy dwca File dwca = cfgN.archiveDir(dk); CompressionUtil.decompressFile(dwca, this.zip); normalizers.add(Normalizer.create(cfgN, registry, dk)); } log.println("Submitted tasks ..."); for (int x = 0; x < threads; x++) { Normalizer n = normalizers.removeFirst(); futures.add(ecs.submit(Executors.callable(n, n.getDatasetKey()))); } while (!futures.isEmpty()) { Future<UUID> f = futures.pop(); UUID dk = f.get(); if (dk != null) { // this was a normalizer, submit its importer log.println("Finished normalizer " + dk + " with open files: " + monitor.getOpenFileDescriptorCount()); futures.add(ecs.submit(new RunnableAdapter(buildImporter(dk), null))); } else { log.println("Finished importer with open files: " + monitor.getOpenFileDescriptorCount()); // add a new normalizer if we still have some if (!normalizers.isEmpty()) { Normalizer n = normalizers.removeFirst(); futures.add(ecs.submit(Executors.callable(n, n.getDatasetKey()))); } } } log.println("Finished all tasks. Done"); monitor.run(); } public Importer buildImporter(UUID datasetKey) throws SQLException { return Importer.create(cfgI, datasetKey, usageService, null, sqlService, solrService); } }