package net.sf.jabref.imports; import net.sf.jabref.BibtexEntry; import net.sf.jabref.GUIGlobals; import net.sf.jabref.Globals; import net.sf.jabref.OutputPrinter; import net.sf.jabref.net.URLDownload; import javax.swing.*; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.URL; import java.net.URLEncoder; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class ScienceDirectFetcher implements EntryFetcher { protected static int MAX_PAGES_TO_LOAD = 8; protected static final String WEBSITE_URL = "http://www.sciencedirect.com"; protected static final String SEARCH_URL = WEBSITE_URL +"/science/quicksearch?query="; protected static final String linkPrefix = "http://www.sciencedirect.com/science?_ob=ArticleURL&" ; protected static final Pattern linkPattern = Pattern.compile( "<a href=\""+ linkPrefix.replaceAll("\\?", "\\\\?")+ "([^\"]+)\"\""); protected static final Pattern nextPagePattern = Pattern.compile( "<a href=\"(.*)\">Next >"); protected boolean stopFetching = false; protected boolean noAccessFound = false; public String getHelpPage() { return "ScienceDirect.html"; } public URL getIcon() { return GUIGlobals.getIconUrl("www"); } public String getKeyName() { return "Search ScienceDirect"; } public JPanel getOptionsPanel() { // No Options panel return null; } public String getTitle() { return Globals.menuTitle("Search ScienceDirect"); } public void stopFetching() { stopFetching = true; noAccessFound = false; } public boolean processQuery(String query, ImportInspector dialog, OutputPrinter status) { stopFetching = false; try { List<String> citations = getCitations(query); if (citations == null) return false; if (citations.size() == 0){ status.showMessage(Globals.lang("No entries found for the search string '%0'", query), Globals.lang("Search ScienceDirect"), JOptionPane.INFORMATION_MESSAGE); return false; } int i=0; for (String cit : citations) { if (stopFetching) break; BibtexEntry entry = BibsonomyScraper.getEntry(cit); if (entry != null) dialog.addEntry(entry); dialog.setProgress(++i, citations.size()); } return true; } catch (IOException e) { e.printStackTrace(); status.showMessage(Globals.lang("Error while fetching from ScienceDirect") + ": " + e.getMessage()); } return false; } /** * * @param query * The search term to query JStor for. * @return a list of IDs * @throws java.io.IOException */ protected List<String> getCitations(String query) throws IOException { String urlQuery; ArrayList<String> ids = new ArrayList<String>(); try { urlQuery = SEARCH_URL + URLEncoder.encode(query, "UTF-8"); int count = 1; String nextPage = null; while (((nextPage = getCitationsFromUrl(urlQuery, ids)) != null) && (count < MAX_PAGES_TO_LOAD)) { urlQuery = nextPage; count++; } return ids; } catch (UnsupportedEncodingException e) { throw new RuntimeException(e); } } protected String getCitationsFromUrl(String urlQuery, List<String> ids) throws IOException { URL url = new URL(urlQuery); URLDownload ud = new URLDownload(url); ud.download(); String cont = ud.getStringContent(); //String entirePage = cont; Matcher m = linkPattern.matcher(cont); if (m.find()) { while (m.find()) { ids.add(linkPrefix+m.group(1)); cont = cont.substring(m.end()); m = linkPattern.matcher(cont); } } else { return null; } /*m = nextPagePattern.matcher(entirePage); if (m.find()) { String newQuery = WEBSITE_URL +m.group(1); return newQuery; } else*/ return null; } }