/* * Concept profile generation tool suite * Copyright (C) 2015 Biosemantics Group, Erasmus University Medical Center, * Rotterdam, The Netherlands * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/> */ package org.erasmusmc.medline; import java.sql.ResultSet; import java.util.ArrayList; import java.util.Date; import java.util.GregorianCalendar; import java.util.List; import java.util.Map; import java.util.TreeMap; import org.erasmusmc.utilities.StringUtilities; public class FetchRecordsthread extends MySQLthread{ public List<Integer> pmids = new ArrayList<Integer>(); public List<MedlineRecord> records; public List<Integer> unretrievedPmids = new ArrayList<Integer>(); public boolean fetchTitleAndAbstract = true; public boolean fetchMesh = true; public boolean fetchGeneSymbol = false; public boolean fetchJournal = false; public boolean fetchJournalShortForm = false; public boolean fetchSubstances = false; public boolean fetchPublicationType = false; public boolean fetchPublicationDate = false; public boolean fetchAuthors = false; public boolean fetchLanguage = false; public boolean fetchAffiliation = false; public boolean fetchISSN = false; public boolean fetchVolumeIssuePages = false; public boolean generateUnretrievedPmidsList = true; public FetchRecordsthread(String server){ super(server); } public FetchRecordsthread(String server, String database, String user, String password){ super(server, database, user, password); } public FetchRecordsthread(){ super(); } protected void process(){ String query = "(" + StringUtilities.join(pmids,",") + ")"; pmid2record.clear(); unretrievedPmids.clear(); records = new ArrayList<MedlineRecord>(); ResultSet rs; StringBuffer baseQuery = new StringBuffer(); baseQuery.append("select pmid"); if (fetchTitleAndAbstract) baseQuery.append(",article_title"); if (fetchJournal) baseQuery.append(",journal_title"); if (fetchJournalShortForm) baseQuery.append(",medline_ta"); if (fetchPublicationDate) baseQuery.append(",pub_date_year, pub_date_month, pub_date_day, medline_date"); if (fetchAffiliation) baseQuery.append(",article_affiliation"); if (fetchISSN) baseQuery.append(",issn,issn_linking"); if (fetchVolumeIssuePages) baseQuery.append(",volume,issue,start_page,end_page,medline_pgn"); baseQuery.append(" from medline_citation where pmid in "); try { if (fetchTitleAndAbstract || fetchJournal || fetchJournalShortForm || fetchPublicationDate || fetchAffiliation || fetchISSN || fetchVolumeIssuePages) { rs = stmt.executeQuery(baseQuery.toString() + query); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = newRecord(fetchPMID); if (fetchTitleAndAbstract) { fetchRecord.title = rs.getString("article_title"); } if (fetchJournal) fetchRecord.journal = rs.getString("journal_title"); if (fetchJournalShortForm) fetchRecord.journalShortForm = rs.getString("medline_ta"); if (fetchPublicationDate){ fetchRecord.publicationDate = parseDate(rs.getString("pub_date_year"), rs.getString("pub_date_month"), rs.getString("pub_date_day"), rs.getString("medline_date")); } if (fetchAffiliation) fetchRecord.affiliation = rs.getString("article_affiliation"); if (fetchISSN) { fetchRecord.issn = rs.getString("issn"); fetchRecord.issnLinking = rs.getString("issn_linking"); } if (fetchVolumeIssuePages){ fetchRecord.volume = rs.getString("volume"); fetchRecord.issue = rs.getString("issue"); fetchRecord.pages = parsePages(rs.getString("start_page"),rs.getString("end_page"),rs.getString("medline_pgn")); } } } if (fetchMesh) { rs = stmt.executeQuery("select pmid,descriptor_name,descriptor_name_major_yn from medline_mesh_heading where pmid in "+query); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = pmid2record.get(fetchPMID); if (fetchRecord==null) fetchRecord = newRecord(fetchPMID); MeSHHeader meSHHeader = new MeSHHeader(); meSHHeader.descriptor = rs.getString("descriptor_name"); meSHHeader.descriptorMajor = rs.getBoolean("descriptor_name_major_yn"); fetchRecord.meshHeaders.add(meSHHeader); } rs = stmt.executeQuery("select pmid,descriptor_name,qualifier_name,qualifier_name_major_yn from medline_mesh_heading_qualifier where pmid in "+query); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = pmid2record.get(fetchPMID); String descriptor = rs.getString("descriptor_name"); for (MeSHHeader meSHHeader : fetchRecord.meshHeaders) if (meSHHeader.descriptor.equals(descriptor)){ if (meSHHeader.qualifier != null){ MeSHHeader copy = new MeSHHeader(); copy.descriptor = meSHHeader.descriptor; copy.descriptorMajor = meSHHeader.descriptorMajor; fetchRecord.meshHeaders.add(copy); meSHHeader = copy; } meSHHeader.qualifier = rs.getString("qualifier_name"); meSHHeader.qualifierMajor = rs.getBoolean("qualifier_name_major_yn"); break; } } } if (fetchPublicationType) { rs = stmt.executeQuery("select pmid,publication_type from medline_article_publication_type where pmid in "+query); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = pmid2record.get(fetchPMID); if (fetchRecord==null) fetchRecord = newRecord(fetchPMID); fetchRecord.publicationType.add(rs.getString("publication_type")); } } if (fetchSubstances) { rs = stmt.executeQuery("select pmid,name_of_substance from medline_chemical_list where pmid in "+query.toString()); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = pmid2record.get(fetchPMID); if (fetchRecord==null) fetchRecord = newRecord(fetchPMID); fetchRecord.substances.add(rs.getString("name_of_substance")); } } if (fetchGeneSymbol) { rs = stmt.executeQuery("select pmid,gene_symbol from medline_gene_symbol_list where pmid in "+query.toString()); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = pmid2record.get(fetchPMID); if (fetchRecord==null) fetchRecord = newRecord(fetchPMID); fetchRecord.geneSymbols.add(rs.getString("gene_symbol")); } } if (fetchAuthors) { rs = stmt.executeQuery("select pmid,author_order,last_name,initials,collective_name from medline_author where pmid in "+query.toString()); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = pmid2record.get(fetchPMID); String lastName = rs.getString("last_name"); String initials = rs.getString("initials"); String collectiveName = rs.getString("collective_name"); StringBuffer name = new StringBuffer(); if (lastName != null){ name.append(lastName); if (initials != null){ name.append(" "); name.append(initials); } } else { if (collectiveName != null){ name.append(collectiveName); } } if (name.length() != 0){ if (fetchRecord==null) fetchRecord = newRecord(fetchPMID); int author_order = rs.getInt("author_order"); for (int i = fetchRecord.authors.size(); i <= author_order; i++) fetchRecord.authors.add(null); fetchRecord.authors.set(author_order,name.toString()); } } } if (fetchLanguage) { rs = stmt.executeQuery("select pmid,language from medline_article_language where pmid in "+query.toString()); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = pmid2record.get(fetchPMID); if (fetchRecord==null) fetchRecord = newRecord(fetchPMID); fetchRecord.language = rs.getString("language"); } } if (fetchTitleAndAbstract){ rs = stmt.executeQuery("select pmid,abstract_order,abstract_label,abstract_nlmcategory,abstract_text from medline_abstract where pmid in "+query.toString()); rs.beforeFirst(); while (rs.next()){ int fetchPMID = Integer.parseInt(rs.getString("pmid")); MedlineRecord fetchRecord = pmid2record.get(fetchPMID); if (fetchRecord==null) fetchRecord = newRecord(fetchPMID); //Add this to add section headers to abstracts: /*else if (fetchRecord.abstractText.length() != 0) fetchRecord.abstractText += "\n\n"; String label = rs.getString("abstract_label"); if (label == null){ String nlmcategory = rs.getString("abstract_label"); if (nlmcategory != null) fetchRecord.abstractText += nlmcategory + "\n\n"; } else fetchRecord.abstractText += label + "\n\n"; */ fetchRecord.abstractText += rs.getString("abstract_text"); } } }catch( Exception e ) { e.printStackTrace(); } if (generateUnretrievedPmidsList){ for(Integer pmid:pmids){ if(!pmid2record.containsKey(pmid)){ unretrievedPmids.add(pmid); } } } } private String parsePages(String startPage, String endPage, String pages) { if (pages != null) return pages; if (startPage != null) if (endPage != null) return startPage+"-"+endPage; else return startPage; return null; } private List<String> months = getMonths(); private static List<String> getMonths(){ List<String> result = new ArrayList<String>(12); result.add("Jan"); result.add("Feb"); result.add("Mar"); result.add("Apr"); result.add("May"); result.add("Jun"); result.add("Jul"); result.add("Aug"); result.add("Sep"); result.add("Oct"); result.add("Nov"); result.add("Dec"); return result; } private Date parseDate(String yearString, String monthString, String dayString, String medlineString) { int year = 0; if (yearString == null){ if (medlineString == null) return null; for (Integer i = 1975; i < 2100; i++) if (medlineString.contains(i.toString())){ year = i; break; } if (year == 0) return null; } else { year = Integer.parseInt(yearString); } int month = 0; if (monthString == null){ if (medlineString != null){ for (int i = 0; i < months.size(); i++){ if (medlineString.contains(months.get(i))){ month = i; break; } } } } else { month = months.indexOf(monthString); } int day = dayString == null ? 1 : Integer.parseInt(dayString); return new GregorianCalendar(year, month, day).getTime(); } private MedlineRecord newRecord(int fetchPMID) { MedlineRecord fetchRecord = new MedlineRecord(fetchPMID); records.add(fetchRecord); pmid2record.put(fetchPMID,fetchRecord); return fetchRecord; } private Map<Integer, MedlineRecord> pmid2record = new TreeMap<Integer, MedlineRecord>(); }