package org.molgenis.ontology.sorta.job; import org.elasticsearch.common.collect.Iterables; import org.molgenis.data.DataService; import org.molgenis.data.Entity; import org.molgenis.data.populate.IdGenerator; import org.molgenis.data.jobs.Progress; import org.molgenis.data.support.DynamicEntity; import org.molgenis.data.support.QueryImpl; import org.molgenis.ontology.controller.SortaServiceController; import org.molgenis.ontology.core.meta.OntologyTermMetaData; import org.molgenis.ontology.sorta.meta.MatchingTaskContentMetaData; import org.molgenis.ontology.sorta.service.SortaService; import org.molgenis.security.core.runas.RunAsSystemProxy; import org.molgenis.ui.menu.MenuReaderService; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; import static com.google.common.collect.Lists.newArrayList; import static java.util.Objects.requireNonNull; import static org.molgenis.ontology.sorta.meta.OntologyTermHitMetaData.SCORE; import static org.molgenis.util.ApplicationContextProvider.getApplicationContext; public class SortaJobProcessor { private static final int ADD_BATCH_SIZE = 1000; private static final int PROGRESS_UPDATE_BATCH_SIZE = 50; private final String ontologyIri; private final String inputRepositoryName; private final String resultRepositoryName; private final Progress progress; private final DataService dataService; private final SortaService sortaService; private final IdGenerator idGenerator; private final AtomicInteger counter; private final MenuReaderService menuReaderService; public SortaJobProcessor(String ontologyIri, String inputRepositoryName, String resultRepositoryName, Progress progress, DataService dataService, SortaService sortaService, IdGenerator idGenerator, MenuReaderService menuReaderService) { this.ontologyIri = requireNonNull(ontologyIri); this.inputRepositoryName = requireNonNull(inputRepositoryName); this.resultRepositoryName = requireNonNull(resultRepositoryName); this.progress = requireNonNull(progress); this.dataService = requireNonNull(dataService); this.sortaService = requireNonNull(sortaService); this.idGenerator = requireNonNull(idGenerator); this.counter = new AtomicInteger(0); this.menuReaderService = requireNonNull(menuReaderService); } public void process() { RunAsSystemProxy.runAsSystem(() -> { long maxCount = dataService.count(inputRepositoryName, new QueryImpl<>()); progress.status( "Matching " + maxCount + " input terms from " + inputRepositoryName + ".\nStoring results in " + resultRepositoryName); progress.setProgressMax((int) maxCount); // FIXME get rid of getApplicationContext reference MatchingTaskContentMetaData matchingTaskContentMetaData = getApplicationContext() .getBean(MatchingTaskContentMetaData.class); // Match input terms with code List<Entity> entitiesToAdd = newArrayList(); dataService.findAll(inputRepositoryName).forEach(inputRow -> { Entity resultEntity = new DynamicEntity(matchingTaskContentMetaData) { @Override protected void validateValueType(String attrName, Object value) { // FIXME enable validation by not overriding this method } }; resultEntity.set(MatchingTaskContentMetaData.INPUT_TERM, inputRow); resultEntity.set(MatchingTaskContentMetaData.IDENTIFIER, idGenerator.generateId()); resultEntity.set(MatchingTaskContentMetaData.VALIDATED, false); entitiesToAdd.add(resultEntity); Iterable<Entity> ontologyTermEntities = sortaService.findOntologyTermEntities(ontologyIri, inputRow); if (Iterables.size(ontologyTermEntities) > 0) { Entity firstMatchedOntologyTerm = Iterables .getFirst(ontologyTermEntities, new DynamicEntity(matchingTaskContentMetaData)); resultEntity.set(MatchingTaskContentMetaData.MATCHED_TERM, firstMatchedOntologyTerm.get(OntologyTermMetaData.ONTOLOGY_TERM_IRI)); resultEntity.set(MatchingTaskContentMetaData.SCORE, firstMatchedOntologyTerm.get(SCORE)); } else { resultEntity.set(MatchingTaskContentMetaData.SCORE, 0); } // Add entity in batch if (entitiesToAdd.size() >= ADD_BATCH_SIZE) { dataService.add(resultRepositoryName, entitiesToAdd.stream()); entitiesToAdd.clear(); } // Increase the number of the progress counter.incrementAndGet(); // Update the progress only when the progress proceeds the threshold if (counter.get() % PROGRESS_UPDATE_BATCH_SIZE == 0) { progress.progress(counter.get(), "Processed " + counter + " input terms."); } }); // Add the rest if (entitiesToAdd.size() != 0) { dataService.add(resultRepositoryName, entitiesToAdd.stream()); } progress.progress(counter.get(), "Processed " + counter + " input terms."); progress.setResultUrl(menuReaderService.getMenu().findMenuItemPath(SortaServiceController.ID) + "/result/" + resultRepositoryName); }); } }