/* * This file is part of ADDIS (Aggregate Data Drug Information System). * ADDIS is distributed from http://drugis.org/. * Copyright © 2009 Gert van Valkenhoef, Tommi Tervonen. * Copyright © 2010 Gert van Valkenhoef, Tommi Tervonen, Tijs Zwinkels, * Maarten Jacobs, Hanno Koeslag, Florin Schimbinschi, Ahmad Kamal, Daniel * Reid. * Copyright © 2011 Gert van Valkenhoef, Ahmad Kamal, Daniel Reid, Florin * Schimbinschi. * Copyright © 2012 Gert van Valkenhoef, Daniel Reid, Joël Kuiper, Wouter * Reckman. * Copyright © 2013 Gert van Valkenhoef, Joël Kuiper. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.drugis.addis.imports; import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.net.URLConnection; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.drugis.addis.entities.PubMedId; import org.drugis.addis.entities.PubMedIdList; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.xml.sax.SAXException; public class PubMedIDRetriever { public static final int READ_TIMEOUT = 3000; public static final int CONNECTION_TIMEOUT = 3000; public static final String PUBMED_API = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/"; public static class ParseException extends IOException { private static final long serialVersionUID = -3902366298759803187L; private final Throwable d_cause; public ParseException(String message, Throwable cause) { super(message); d_cause = cause; } @Override public Throwable getCause() { return d_cause; } } public PubMedIdList importPubMedID(String StudyID) throws IOException { // First returned document is a key into the results. InputStream inOne = openUrl(PUBMED_API + "esearch.fcgi?db=pubmed&retmax=0&usehistory=y&term="+StudyID+"[Secondary%20Source%20ID]"); String resultsUrl = getResultsUrl(inOne); // Second returned document contains results. InputStream inTwo = openUrl(PUBMED_API + resultsUrl); Document docTwo = parse(inTwo); return getIdList(docTwo); } public static Document parse(InputStream is) throws IOException { DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); domFactory.setValidating(false); domFactory.setNamespaceAware(false); domFactory.setIgnoringElementContentWhitespace(true); DocumentBuilder builder = null; try { builder = domFactory.newDocumentBuilder(); } catch (ParserConfigurationException e) { throw new RuntimeException(e); } Document ret; try { ret = builder.parse(is); } catch (SAXException e) { throw new ParseException("Error parsing PubMed response", e); } return ret; } private String getResultsUrl(InputStream inOne) throws IOException { Document docOne = parse(inOne); String queryKey = getTagValue(docOne, "QueryKey"); String webEnv = getTagValue(docOne, "WebEnv"); String resultsUrl = "esummary.fcgi?db=pubmed&retmode=xml&query_key="+queryKey+"&WebEnv="+webEnv+"&retstart=0"; return resultsUrl; } private String getTagValue(Document docOne, String tagName) { NodeList QK = docOne.getElementsByTagName(tagName); return QK.item(0).getFirstChild().getNodeValue(); } private PubMedIdList getIdList(Document docTwo) { NodeList PID = docTwo.getElementsByTagName("Id"); PubMedIdList PubMedID = new PubMedIdList(); for (int i = 0; i < PID.getLength(); i++) { PubMedID.add(new PubMedId(PID.item(i).getFirstChild().getNodeValue())); } return PubMedID; } public static InputStream openUrl(String url) throws IOException { URLConnection urlConn = new URL(url).openConnection(); urlConn.setConnectTimeout(CONNECTION_TIMEOUT); urlConn.setReadTimeout(READ_TIMEOUT); InputStream is = urlConn.getInputStream(); return is; } }