package org.gbif.checklistbank.service.mybatis.tmp;
import org.gbif.api.model.checklistbank.ParsedName;
import org.gbif.api.service.checklistbank.NameParser;
import org.gbif.checklistbank.config.ClbConfiguration;
import org.gbif.checklistbank.model.ScientificName;
import org.gbif.checklistbank.service.mybatis.guice.InternalChecklistBankServiceMyBatisModule;
import org.gbif.checklistbank.service.mybatis.mapper.NameUsageMapper;
import org.gbif.checklistbank.service.mybatis.mapper.ParsedNameMapper;
import org.gbif.nameparser.GBIFNameParser;
import org.gbif.utils.concurrent.ExecutorUtils;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.google.inject.Guice;
import com.google.inject.Injector;
import org.apache.ibatis.exceptions.PersistenceException;
import org.apache.ibatis.session.ExecutorType;
import org.apache.ibatis.session.ResultContext;
import org.apache.ibatis.session.ResultHandler;
import org.mybatis.guice.transactional.Transactional;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
*/
public class NameUsageReparser implements Runnable {
private static final Logger LOG = LoggerFactory.getLogger(NameUsageReparser.class);
private static final int BATCH_SIZE = 1000;
private final NameParser parser = new GBIFNameParser();
private final ExecutorService exec;
private final NameUsageMapper usageMapper;
private final ParsedNameMapper nameMapper;
private final int threads;
private int jobCounter = 0;
private int counter = 0;
private int failed = 0;
private int unparsable = 0;
public NameUsageReparser(ClbConfiguration cfg) {
Injector inj = Guice.createInjector(InternalChecklistBankServiceMyBatisModule.create(cfg));
nameMapper = inj.getInstance(ParsedNameMapper.class);
usageMapper = inj.getInstance(NameUsageMapper.class);
threads = Math.max(1, cfg.maximumPoolSize-1);
exec = Executors.newFixedThreadPool(threads);
}
@Override
public void run() {
LOG.info("Submit reparsing jobs in batches of {} to executor with {} threads.", BATCH_SIZE, threads);
ReparseHandler handler = new ReparseHandler();
usageMapper.processAllNames(handler);
// finally submit the remaining unfinished batch
handler.submitBatch();
LOG.info("Submitted all {} jobs.", jobCounter);
ExecutorUtils.stop(exec, 10, TimeUnit.SECONDS);
if (jobCounter != 0) {
LOG.warn("Something not right. All jobs should be done but {} remain in counter", jobCounter);
}
LOG.info("Done! Reparsed {} unique names, {} failed, {} unparsable", counter, failed, unparsable);
}
private class ReparseHandler implements ResultHandler<ScientificName> {
List<ScientificName> batch = Lists.newArrayList();
@Override
public void handleResult(ResultContext<? extends ScientificName> context) {
batch.add(context.getResultObject());
if (batch.size() >= BATCH_SIZE) {
submitBatch();
}
}
public void submitBatch() {
ReparseBatch job = new ReparseBatch(batch);
exec.submit(job);
batch.clear();
jobCounter++;
}
}
class ScientificParsedName {
public final ScientificName sciname;
public final ParsedName pn;
public ScientificParsedName(ScientificName sciname, ParsedName pn) {
this.sciname = sciname;
this.pn = pn;
}
}
private class ReparseBatch implements Runnable {
private final List<ScientificName> names;
private ReparseBatch(List<ScientificName> names) {
this.names = ImmutableList.copyOf(names);
}
@Override
public void run() {
try {
// parse names
List<ScientificParsedName> pNames = Lists.newArrayList();
for (ScientificName n : names) {
counter++;
ParsedName p = parser.parseQuietly(n.getScientificName(), n.getRank());
if (!p.isParsed()) {
if (p.getType() == null || p.getType().isParsable()) {
failed++;
} else {
unparsable++;
}
}
pNames.add(new ScientificParsedName(n, p));
}
// write names to table. rank & scientific_name must be unique already!
writeNames(pNames);
jobCounter--;
if (jobCounter % 100 == 0) {
LOG.info("Reparsed {} unique names. {} failed, {} unparsable. {} batches left", counter, failed, unparsable, jobCounter);
} else if (jobCounter % 10 == 0) {
LOG.debug("Reparsed {} unique names. {} failed, {} unparsable. {} batches left", counter, failed, unparsable, jobCounter);
}
} catch (Exception e) {
LOG.error("Batch reparsing error {}", e);
}
}
@Transactional(
exceptionMessage = "names inserts failed",
executorType = ExecutorType.REUSE
)
private void writeNames(List<ScientificParsedName> pNames) {
for (ScientificParsedName spn : pNames) {
try {
nameMapper.create2(spn.sciname.getKey(), spn.pn);
} catch (PersistenceException e) {
Throwable cause = e.getCause() != null ? e.getCause() : e;
LOG.warn("Failed to persist name {}: {}", spn.pn, cause.getMessage());
nameMapper.failed(spn.sciname.getKey(), spn.pn.getScientificName(), spn.pn.getRank());
} catch (Exception e) {
LOG.error("Unexpected error persisting name {}", spn.pn, e);
nameMapper.failed(spn.sciname.getKey(), spn.sciname.getScientificName(), spn.sciname.getRank());
}
}
}
}
}