/* Copyright (C) 2003-2011 JabRef contributors. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ package net.sf.jabref.imports; import net.sf.jabref.BibtexEntry; import net.sf.jabref.GUIGlobals; import net.sf.jabref.Globals; import net.sf.jabref.OutputPrinter; import net.sf.jabref.net.URLDownload; import javax.swing.*; import java.io.*; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; public class JSTORFetcher2 implements EntryFetcher { protected static final String CANCELLED = "__CANCELLED__"; protected static int MAX_PAGES_TO_LOAD = 8; protected static int MAX_REFS = 7 * 25; protected static int REFS_PER_PAGE = 25; // This is the current default of JSTOR; protected static final String JSTOR_URL = "http://www.jstor.org"; protected static final String SEARCH_URL = JSTOR_URL + "/action/doBasicSearch?Query="; protected static final String SEARCH_URL_END = "&x=0&y=0&wc=on"; protected static final String SINGLE_CIT_ENC = "http://www.jstor.org/action/exportSingleCitation?singleCitation=true&suffix="; //"http%3A%2F%2Fwww.jstor.org%2Faction%2FexportSingleCitation%3FsingleCitation" //+"%3Dtrue%26suffix%3D"; protected static final Pattern idPattern = Pattern.compile( "<a class=\"title\" href=\"/stable/(\\d+)\\?"); protected static final Pattern numberofhits = Pattern.compile( "<span id=\"NumberOfHits\" name=\"(\\d+)\""); protected static final Pattern nextPagePattern = Pattern.compile( "<a href=\"(.*)\">Next »"); protected static final String noAccessIndicator = "We do not recognize you as having access to JSTOR"; protected boolean stopFetching = false; protected boolean noAccessFound = false; public String getHelpPage() { return "JSTOR.html"; } public URL getIcon() { return GUIGlobals.getIconUrl("www"); } public String getKeyName() { return "JSTOR"; } public JPanel getOptionsPanel() { // No Options panel return null; } public String getTitle() { return "JSTOR"; } public void stopFetching() { stopFetching = true; noAccessFound = false; } public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) { stopFetching = false; try { List<String> citations = getCitations(query, dialog, status); //System.out.println("JSTORFetcher2 processQuery within list"); if (citations == null) { return false; } //System.out.println("JSTORFetcher2 processQuery after false citations=" + citations); if (citations.size() == 0) { if (!noAccessFound) { status.showMessage(Globals.lang("No entries found for the search string '%0'", query), Globals.lang("Search JSTOR"), JOptionPane.INFORMATION_MESSAGE); } else { status.showMessage(Globals.lang("No entries found. It looks like you do not have access to search JStor.", query), Globals.lang("Search JSTOR"), JOptionPane.INFORMATION_MESSAGE); } return false; } int i = 0; for (String cit : citations) { if (stopFetching) { break; } BibtexEntry entry = getSingleCitation(cit); if (entry != null) { dialog.addEntry(entry); } dialog.setProgress(++i, citations.size()); } return true; } catch (IOException e) { e.printStackTrace(); status.showMessage(Globals.lang("Error while fetching from JSTOR") + ": " + e.getMessage()); } return false; } /** * * @param query * The search term to query JStor for. * @return a list of IDs * @throws java.io.IOException */ protected List<String> getCitations(String query, ImportInspector dialog, OutputPrinter status) throws IOException { String urlQuery; ArrayList<String> ids = new ArrayList<String>(); try { urlQuery = SEARCH_URL + URLEncoder.encode(query, "UTF-8") + SEARCH_URL_END; int count = 1; String numberOfRefs[] = new String[2]; int refsRequested = 0; int numberOfPagesRequested = MAX_PAGES_TO_LOAD; String nextPage = null; while ((count <= Math.min(MAX_PAGES_TO_LOAD, numberOfPagesRequested)) && ((nextPage = getCitationsFromUrl(urlQuery, ids, count, numberOfRefs, dialog, status)) != null)) { // If user has cancelled the import, return null to signal this: if ((count == 1) && (nextPage.equals(CANCELLED))) return null; //System.out.println("JSTORFetcher2 getCitations numberofrefs=" + numberOfRefs[0]); //System.out.println("JSTORFetcher2 getCitations numberofrefs=" + " refsRequested=" + numberOfRefs[1]); refsRequested = Integer.valueOf(numberOfRefs[1]); //System.out.println("JSTORFetcher2 getCitations refsRequested=" + Integer.valueOf(refsRequested)); numberOfPagesRequested = ((refsRequested -1) - (refsRequested -1) % REFS_PER_PAGE) / REFS_PER_PAGE + 1; //System.out.println("JSTORFetcher2 getCitations numberOfPagesRequested=" + Integer.valueOf(numberOfPagesRequested)); urlQuery = nextPage; //System.out.println("JSTORFetcher2 getcitations count=" + Integer.valueOf(count) + " ids=" + ids); count++; } return ids; } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } protected String getCitationsFromUrl(String urlQuery, List<String> ids, int count, String[] numberOfRefs, ImportInspector dialog, OutputPrinter status) throws IOException { URL url = new URL(urlQuery); URLDownload ud = new URLDownload(url); ud.download(); String cont = ud.getStringContent(); String entirePage = cont; String pageEntire = ud.getStringContent(); int countOfRefs = 0; int refsRequested = 0; if (count == 1) { // Readin the numberofhits (only once) Matcher mn = numberofhits.matcher(pageEntire); if (mn.find()) { //System.out.println("JSTORFetcher2 getCitationsFromUrl numberofhits=" + mn.group(1)); numberOfRefs[0] = mn.group(1); countOfRefs = Integer.valueOf(numberOfRefs[0]); //System.out.println("JSTORFetcher2 getCitationsFromUrl numberofrefs[0]=" + Integer.valueOf(numberOfRefs[0])); } else { //System.out.println("JSTORFetcher2 getCitationsFromUrl cant find numberofhits="); numberOfRefs[0] = "0"; } while (true) { String strCount = JOptionPane.showInputDialog(Globals.lang("References found") + ": " + countOfRefs + " " + Globals.lang("Number of references to fetch?"), Integer.toString(countOfRefs)); if (strCount == null) { status.setStatus(Globals.lang("JSTOR import cancelled")); return CANCELLED; } try { numberOfRefs[1] = strCount.trim(); refsRequested = Integer.parseInt(numberOfRefs[1]); break; } catch (RuntimeException ex) { status.showMessage(Globals.lang("Please enter a valid number")); } } } countOfRefs = Integer.valueOf(numberOfRefs[0]); refsRequested = Integer.valueOf(numberOfRefs[1]); Matcher m = idPattern.matcher(cont); if (m.find() && (ids.size() + 1 <= Integer.valueOf(refsRequested)) ) { do { ids.add(m.group(1)); cont = cont.substring(m.end()); m = idPattern.matcher(cont); } while (m.find() && (ids.size() + 1 <= Integer.valueOf(refsRequested))); } else if (entirePage.indexOf(noAccessIndicator) >= 0) { noAccessFound = true; return null; } else { return null; } m = nextPagePattern.matcher(entirePage); if (m.find()) { String newQuery = JSTOR_URL + m.group(1); return newQuery; } else { return null; } } protected BibtexEntry getSingleCitation(String cit) { return BibsonomyScraper.getEntry(SINGLE_CIT_ENC + cit); } }