package net.sf.jabref.imports; import net.sf.jabref.BibtexEntry; import net.sf.jabref.GUIGlobals; import net.sf.jabref.Globals; import net.sf.jabref.OutputPrinter; import net.sf.jabref.net.URLDownload; import javax.swing.*; import java.io.*; import java.net.URL; import java.net.URLConnection; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.StringTokenizer; import java.util.regex.Matcher; import java.util.regex.Pattern; public class JSTORFetcher2 implements EntryFetcher { protected static int MAX_PAGES_TO_LOAD = 8; protected static final String JSTOR_URL = "http://www.jstor.org"; protected static final String SEARCH_URL = JSTOR_URL+"/action/doBasicSearch?Query="; protected static final String SEARCH_URL_END = "&x=0&y=0&wc=on"; protected static final String SINGLE_CIT_ENC = "http://www.jstor.org/action/exportSingleCitation?singleCitation=true&suffix="; //"http%3A%2F%2Fwww.jstor.org%2Faction%2FexportSingleCitation%3FsingleCitation" //+"%3Dtrue%26suffix%3D"; protected static final Pattern idPattern = Pattern.compile( "<a class=\"title\" href=\"/stable/(\\d+)\\?"); protected static final Pattern nextPagePattern = Pattern.compile( "<a href=\"(.*)\">Next >"); protected static final String noAccessIndicator = "We do not recognize you as having access to JSTOR"; protected boolean stopFetching = false; protected boolean noAccessFound = false; public String getHelpPage() { return "JSTOR.html"; } public URL getIcon() { return GUIGlobals.getIconUrl("www"); } public String getKeyName() { return "Search JSTOR"; } public JPanel getOptionsPanel() { // No Options panel return null; } public String getTitle() { return Globals.menuTitle("Search JSTOR"); } public void stopFetching() { stopFetching = true; noAccessFound = false; } public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) { stopFetching = false; try { List<String> citations = getCitations(query); if (citations == null) return false; if (citations.size() == 0){ if (!noAccessFound) status.showMessage(Globals.lang("No entries found for the search string '%0'", query), Globals.lang("Search JSTOR"), JOptionPane.INFORMATION_MESSAGE); else { status.showMessage(Globals.lang("No entries found. It looks like you do not have access to search JStor.", query), Globals.lang("Search JSTOR"), JOptionPane.INFORMATION_MESSAGE); } return false; } int i=0; for (String cit : citations) { if (stopFetching) break; BibtexEntry entry = getSingleCitation(cit); if (entry != null) dialog.addEntry(entry); dialog.setProgress(++i, citations.size()); } return true; } catch (IOException e) { e.printStackTrace(); status.showMessage(Globals.lang("Error while fetching from JSTOR") + ": " + e.getMessage()); } return false; } /** * * @param query * The search term to query JStor for. * @return a list of IDs * @throws java.io.IOException */ protected List<String> getCitations(String query) throws IOException { String urlQuery; ArrayList<String> ids = new ArrayList<String>(); try { urlQuery = SEARCH_URL + URLEncoder.encode(query, "UTF-8") + SEARCH_URL_END; int count = 1; String nextPage = null; while (((nextPage = getCitationsFromUrl(urlQuery, ids)) != null) && (count < MAX_PAGES_TO_LOAD)) { urlQuery = nextPage; count++; } return ids; } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } protected String getCitationsFromUrl(String urlQuery, List<String> ids) throws IOException { URL url = new URL(urlQuery); URLDownload ud = new URLDownload(url); ud.download(); String cont = ud.getStringContent(); String entirePage = cont; Matcher m = idPattern.matcher(cont); if (m.find()) { while (m.find()) { ids.add(m.group(1)); cont = cont.substring(m.end()); m = idPattern.matcher(cont); } } else if (entirePage.indexOf(noAccessIndicator) >= 0) { noAccessFound = true; return null; } else { return null; } m = nextPagePattern.matcher(entirePage); if (m.find()) { String newQuery = JSTOR_URL+m.group(1); return newQuery; } else return null; } protected BibtexEntry getSingleCitation(String cit) { return BibsonomyScraper.getEntry(SINGLE_CIT_ENC+cit); } }