package org.gbif.checklistbank.neo;
import org.gbif.checklistbank.cli.normalizer.InsertMetadata;
import org.gbif.checklistbank.cli.normalizer.NormalizerConfiguration;
import org.gbif.checklistbank.utils.ResourcesMonitor;
import org.gbif.utils.file.CompressionUtil;
import java.io.File;
import java.nio.file.Files;
import java.util.List;
import java.util.Timer;
import java.util.UUID;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorCompletionService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.codahale.metrics.MetricRegistry;
import org.apache.commons.io.FileUtils;
import org.junit.Ignore;
import org.junit.Test;
@Ignore("manual long running test to discover why we see too many hanging PageCache threads in neos batch inserter")
public class MultiThreadingNeoInserterTest {
private final int threads = 5;
public class InserterJob implements Callable<InsertMetadata> {
private final UUID key;
private final NormalizerConfiguration cfg;
private final MetricRegistry registry;
public InserterJob(UUID key, NormalizerConfiguration cfg, MetricRegistry registry) {
this.cfg = cfg;
this.key = key;
this.registry = registry;
}
@Override
public InsertMetadata call() throws Exception {
UsageDao dao = UsageDao.persistentDao(cfg.neo, key, false, registry, true);
try {
NeoInserter ins = dao.createBatchInserter(100);
File dwca = cfg.archiveDir(key);
InsertMetadata m = ins.insert(dwca, Maps.<String, UUID>newHashMap());
ins.close();
return m;
} finally {
dao.close();
}
}
}
@Test
public void manyNormalizersInParallel() throws Exception {
final int tasks = 500;
NormalizerConfiguration cfgN = new NormalizerConfiguration();
cfgN.neo.neoRepository = Files.createTempDirectory("neotest").toFile();
cfgN.archiveRepository = Files.createTempDirectory("neotestdwca").toFile();
MetricRegistry registry = new MetricRegistry();
File zip = new File(getClass().getResource("/plazi.zip").getFile());
zip = new File("/Users/markus/code/checklistbank/checklistbank-cli/src/test/resources/plazi.zip");
Timer timer = new Timer();
ResourcesMonitor monitor = new ResourcesMonitor();
timer.schedule(monitor, 500);
ExecutorCompletionService<InsertMetadata> ecs = new ExecutorCompletionService(Executors.newFixedThreadPool(threads));
List<Future<InsertMetadata>> futures = Lists.newArrayList();
for (int i = 0; i < tasks; i++) {
UUID dk = UUID.randomUUID();
// copy dwca
File dwca = cfgN.archiveDir(dk);
CompressionUtil.decompressFile(dwca, zip);
System.out.println("Submit inserter job " + i);
futures.add(ecs.submit(new InserterJob(dk, cfgN, registry)));
}
int idx = 1;
for (Future<InsertMetadata> f : futures) {
f.get();
System.out.println("Finished inserter " + idx++);
monitor.run();
}
System.out.println("Finished all jobs");
System.out.println("Open files: " + monitor.getOpenFileDescriptorCount());
System.out.println("Running threads: " + Thread.getAllStackTraces().size());
for (Thread t : Thread.getAllStackTraces().keySet()) {
System.out.println(t.getState() + " " + t.getName());
}
System.out.println("Cleaning up artifacts...");
FileUtils.deleteQuietly(cfgN.neo.neoRepository);
FileUtils.deleteQuietly(cfgN.archiveRepository);
}
}