/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.applications.indexer; import java.util.ArrayList; import java.util.List; import org.erasmusmc.groundhog.Groundhog; import org.erasmusmc.medline.MedlineIterator; import org.erasmusmc.medline.MedlineListener; import org.erasmusmc.medline.MedlineRecord; import org.erasmusmc.ontology.ConceptVector; import org.erasmusmc.ontology.Ontology; import org.erasmusmc.peregrine.ConceptPeregrine; import org.erasmusmc.peregrine.PeregrineOutputConverter; import org.erasmusmc.peregrine.disambiguator.GeneDisambiguator; import org.erasmusmc.peregrine.disambiguator.UMLSDisambiguator; import org.erasmusmc.utilities.StringUtilities; public class IndexerMainForGroundhog implements MedlineListener { public String normaliserCacheFile = "/tmp/standardNormCache2006.bin"; public MedlineIterator medlineIterator = new MedlineIterator(); public Groundhog groundhog; public boolean disambiguate = true; private ConceptPeregrine indexer = new ConceptPeregrine(); public Ontology ontology; private GeneDisambiguator geneDisambiguator; private UMLSDisambiguator UMLSdisambiguator; private IndexerUpdateThreadForGroundhog updateThread; public boolean destroyOntologyDuringRelease = false; public void start() { System.out.println("Loading normaliser cache. " + StringUtilities.now()); indexer.normaliser.loadCacheBinary(normaliserCacheFile); indexer.setOntology(ontology); System.out.println("Releasing thesaurus. " + StringUtilities.now()); indexer.destroyOntologyDuringRelease = destroyOntologyDuringRelease; indexer.release(); if (disambiguate) { geneDisambiguator = new GeneDisambiguator(indexer, 3000000, Integer.MAX_VALUE); UMLSdisambiguator = new UMLSDisambiguator(0, 3000000); } initUpdateThread(); updateThread.start(); System.out.println("Starting indexation cycles. " + StringUtilities.now()); medlineIterator.iterate(this); try { updateThread.join(); } catch (InterruptedException e) { e.printStackTrace(); } System.out.println("Done. " + StringUtilities.now()); } @Override public void processMedlineRecords(List<MedlineRecord> records) { // Index buffer: List<ConceptVector> conceptVectors = new ArrayList<ConceptVector>(records.size()); for (int i = 0; i < records.size(); i++) { MedlineRecord currentRecord = records.get(i); indexer.index(currentRecord.titleAbsMeshSubs()); if (disambiguate){ geneDisambiguator.disambiguate(indexer); UMLSdisambiguator.disambiguate(indexer); } conceptVectors.add(PeregrineOutputConverter.convertResult2ConceptVector(indexer, groundhog.getOntology())); } // Store buffer: try { // wait until previous update job is done: updateThread.join(); } catch (InterruptedException e) { e.printStackTrace(); } initUpdateThread(); updateThread.records = records; updateThread.conceptVectors = conceptVectors; updateThread.start(); } private void initUpdateThread() { updateThread = new IndexerUpdateThreadForGroundhog(); updateThread.groundhog = groundhog; } }