package net.sf.jabref.imports; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.swing.JOptionPane; import javax.swing.JPanel; import net.sf.jabref.BibtexEntry; import net.sf.jabref.GUIGlobals; import net.sf.jabref.Globals; import net.sf.jabref.OutputPrinter; /** * Fetch or search from Pubmed http://www.ncbi.nlm.nih.gov/sites/entrez/ * */ public class MedlineFetcher implements EntryFetcher { protected class SearchResult { public int count = 0; public int retmax = 0; public int retstart = 0; public String ids = ""; public void addID(String id) { if (ids.equals("")) ids = id; else ids += "," + id; } } /** * How many entries to query in one request */ public static final int PACING = 20; boolean shouldContinue; OutputPrinter frame; ImportInspector dialog; public String toSearchTerm(String in) { Pattern part1 = Pattern.compile(", "); Pattern part2 = Pattern.compile(","); Pattern part3 = Pattern.compile(" "); Matcher matcher; matcher = part1.matcher(in); in = matcher.replaceAll("\\+AND\\+"); matcher = part2.matcher(in); in = matcher.replaceAll("\\+AND\\+"); matcher = part3.matcher(in); in = matcher.replaceAll("+"); return in; } /** * Gets the initial list of ids */ public SearchResult getIds(String term, int start, int pacing) { String baseUrl = "http://eutils.ncbi.nlm.nih.gov/entrez/eutils"; String medlineUrl = baseUrl + "/esearch.fcgi?db=pubmed&retmax=" + Integer.toString(pacing) + "&retstart=" + Integer.toString(start) + "&term="; Pattern idPattern = Pattern.compile("<Id>(\\d+)</Id>"); Pattern countPattern = Pattern.compile("<Count>(\\d+)<\\/Count>"); Pattern retMaxPattern = Pattern.compile("<RetMax>(\\d+)<\\/RetMax>"); Pattern retStartPattern = Pattern.compile("<RetStart>(\\d+)<\\/RetStart>"); boolean doCount = true; SearchResult result = new SearchResult(); try { URL ncbi = new URL(medlineUrl + term); // get the ids BufferedReader in = new BufferedReader(new InputStreamReader(ncbi.openStream())); String inLine; while ((inLine = in.readLine()) != null) { // get the count Matcher idMatcher = idPattern.matcher(inLine); if (idMatcher.find()) { result.addID(idMatcher.group(1)); } Matcher retMaxMatcher = retMaxPattern.matcher(inLine); if (retMaxMatcher.find()) { result.retmax = Integer.parseInt(retMaxMatcher.group(1)); } Matcher retStartMatcher = retStartPattern.matcher(inLine); if (retStartMatcher.find()) { result.retstart = Integer.parseInt(retStartMatcher.group(1)); } Matcher countMatcher = countPattern.matcher(inLine); if (doCount && countMatcher.find()) { result.count = Integer.parseInt(countMatcher.group(1)); doCount = false; } } } catch (MalformedURLException e) { // new URL() failed System.out.println("bad url"); e.printStackTrace(); } catch (IOException e) { // openConnection() failed System.out.println("connection failed"); e.printStackTrace(); } return result; } public void stopFetching() { shouldContinue = false; } public String getHelpPage() { return GUIGlobals.medlineHelp; } public URL getIcon() { return GUIGlobals.getIconUrl("www"); } public String getKeyName() { return "Fetch Medline"; } public JPanel getOptionsPanel() { // No Option Panel return null; } public String getTitle() { return Globals.menuTitle("Search Medline"); } public boolean processQuery(String query, ImportInspector dialog, OutputPrinter frame) { shouldContinue = true; query = query.trim().replace(';', ','); if (query.matches("\\d+[,\\d+]*")) { frame.setStatus(Globals.lang("Fetching Medline by id...")); List<BibtexEntry> bibs = MedlineImporter.fetchMedline(query); if (bibs.size() == 0) { frame.showMessage(Globals.lang("No references found")); } for (BibtexEntry entry : bibs){ dialog.addEntry(entry); } return true; } if (query.length() > 0) { frame.setStatus(Globals.lang("Fetching Medline by term...")); String searchTerm = toSearchTerm(query); // get the ids from entrez SearchResult result = getIds(searchTerm, 0, 1); if (result.count == 0) { frame.showMessage(Globals.lang("No references found")); return false; } int numberToFetch = result.count; if (numberToFetch > PACING) { while (true) { String strCount = JOptionPane.showInputDialog(Globals.lang("References found") + ": " + numberToFetch + " " + Globals.lang("Number of references to fetch?"), Integer .toString(numberToFetch)); if (strCount == null) { frame.setStatus(Globals.lang("Medline import canceled")); return false; } try { numberToFetch = Integer.parseInt(strCount.trim()); break; } catch (RuntimeException ex) { frame.showMessage(Globals.lang("Please enter a valid number")); } } } for (int i = 0; i < numberToFetch; i += PACING) { if (!shouldContinue) break; int noToFetch = Math.min(PACING, numberToFetch - i); // get the ids from entrez result = getIds(searchTerm, i, noToFetch); List<BibtexEntry> bibs = MedlineImporter.fetchMedline(result.ids); for (BibtexEntry entry : bibs){ dialog.addEntry(entry); } dialog.setProgress(i + noToFetch, numberToFetch); } return true; } frame.showMessage(Globals .lang("Please enter a comma separated list of Medline IDs (numbers) or search terms."), Globals.lang("Input error"), JOptionPane.ERROR_MESSAGE); return false; } }