/* * Concept profile generation and analysis for Gene-Disease paper * Copyright (C) 2015 Biosemantics Group, Leiden University Medical Center * Leiden, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package KnowledgeTransfer; import org.erasmusmc.collections.IntList; import org.erasmusmc.collections.SortedIntList2FloatMap; import org.erasmusmc.collections.SortedIntListSet; import org.erasmusmc.groundhog.Groundhog; import org.erasmusmc.groundhog.GroundhogManager; import org.erasmusmc.ontology.ConceptVector; import org.erasmusmc.ontology.ConceptVectorRecord; public class ReadMedlineGroundhog { /** * @param args */ public static void main(String[] args) { String path2folder = "/home/hvanhaagen/textmining/Groundhogs/"; //String folder = "Medline1980till17Jul2012_UMLS2010ABHomologeneJochemToxV1_6"; String folder = "Medline1980till17Jul2012_UMLS2010ABHomologeneJochemToxV1_6-test38"; // Voorbeeld concept Integer cid1 = 20179; //Huntington Disease // Declareer een medline groundhog volgens de legacy code. Groundhog documentProfilesGroundhog; GroundhogManager groundhogmanager2 = new GroundhogManager(path2folder); documentProfilesGroundhog = groundhogmanager2.getGroundhog(folder); // concept id (1) -> PMIDs (many) // dit is de eerste mapping die in de medline groundhog moet zitten. SortedIntListSet pmids = documentProfilesGroundhog.getRecordIDsForConcept(cid1); // Loop over de PMIDs. Controleer voor sommige PMIDs of je Huntington in het abstract terugvindt. // for(Integer pmid:pmids){ // System.out.println(pmid); // } // Bereken het aantal artikelen waar Huntington in voorkomt. System.out.println(pmids.size()); ///////////////////////////////////////////////////////////////////////////////////////// // Neem een willekeurig PMID int random_pmid = 1280937; //Haal de concept IDs op die in dat abstract voorkomen. PMID (1) -> concept ids (many) ConceptVectorRecord cvr = documentProfilesGroundhog.get(random_pmid); // Haal de bananenschil eraf (legacy code) ConceptVector cv = cvr.getConceptVector(); SortedIntList2FloatMap sil2fm = cv.values; IntList keys = sil2fm.keys(); // Loop over de keys. De keys zijn de concept ids. Voor elk concept checken we de frequentie // hoevaak deze voorkomt in het abstract for(Integer key:keys){ float frequency = sil2fm.get(key); System.out.println(key+"\t"+frequency); } } }