package org.jabref.logic.net; import java.io.BufferedInputStream; import java.io.BufferedReader; import java.io.DataOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringWriter; import java.io.Writer; import java.net.CookieHandler; import java.net.CookieManager; import java.net.CookiePolicy; import java.net.HttpCookie; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; import java.net.URLConnection; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.security.SecureRandom; import java.security.cert.X509Certificate; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import org.jabref.logic.util.io.FileUtil; import org.jabref.model.util.FileHelper; import com.mashape.unirest.http.Unirest; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; /** * URL download to a string. * <p> * Example: * URLDownload dl = new URLDownload(URL); * String content = dl.asString(ENCODING); * dl.toFile(Path); // available in FILE * String contentType = dl.getMimeType(); * * Each call to a public method creates a new HTTP connection. Nothing is cached. */ public class URLDownload { public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"; private static final Log LOGGER = LogFactory.getLog(URLDownload.class); private final URL source; private final Map<String, String> parameters = new HashMap<>(); private String postData = ""; /** * @param source the URL to download from * @throws MalformedURLException if no protocol is specified in the source, or an unknown protocol is found */ public URLDownload(String source) throws MalformedURLException { this(new URL(source)); } /** * @param source The URL to download. */ public URLDownload(URL source) { this.source = source; this.addHeader("User-Agent", URLDownload.USER_AGENT); } /** * Older java VMs does not automatically trust the zbMATH certificate. In this case the following exception is * thrown: sun.security.validator.ValidatorException: PKIX path building failed: * sun.security.provider.certpath.SunCertPathBuilderException: unable to find valid certification path to requested * target JM > 8u101 may trust the certificate by default according to http://stackoverflow.com/a/34111150/873661 * * We will fix this issue by accepting all (!) certificates. This is ugly; but as JabRef does not rely on * security-relevant information this is kind of OK (no, actually it is not...). * * Taken from http://stackoverflow.com/a/6055903/873661 */ public static void bypassSSLVerification() { LOGGER.warn("Fix SSL exceptions by accepting ALL certificates"); // Create a trust manager that does not validate certificate chains TrustManager[] trustAllCerts = {new X509TrustManager() { @Override public void checkClientTrusted(X509Certificate[] chain, String authType) { } @Override public void checkServerTrusted(X509Certificate[] chain, String authType) { } @Override public X509Certificate[] getAcceptedIssuers() { return new X509Certificate[0]; } }}; // Install the all-trusting trust manager try { SSLContext context = SSLContext.getInstance("TLS"); context.init(null, trustAllCerts, new SecureRandom()); HttpsURLConnection.setDefaultSSLSocketFactory(context.getSocketFactory()); } catch (Exception e) { LOGGER.error("A problem occurred when bypassing SSL verification", e); } } public URL getSource() { return source; } public String getMimeType() throws IOException { Unirest.setDefaultHeader("User-Agent", "Mozilla/5.0 (Windows; U; WindowsNT 5.1; en-US; rv1.8.1.6) Gecko/20070725 Firefox/2.0.0.6"); String contentType; // Try to use HEAD request to avoid downloading the whole file try { contentType = Unirest.head(source.toString()).asString().getHeaders().get("Content-Type").get(0); if (contentType != null && !contentType.isEmpty()) { return contentType; } } catch (Exception e) { LOGGER.debug("Error getting MIME type of URL via HEAD request", e); } // Use GET request as alternative if no HEAD request is available try { contentType = Unirest.get(source.toString()).asString().getHeaders().get("Content-Type").get(0); if (contentType != null && !contentType.isEmpty()) { return contentType; } } catch (Exception e) { LOGGER.debug("Error getting MIME type of URL via GET request", e); } // Try to resolve local URIs try { URLConnection connection = new URL(source.toString()).openConnection(); contentType = connection.getContentType(); if (contentType != null && !contentType.isEmpty()) { return contentType; } } catch (IOException e) { LOGGER.debug("Error trying to get MIME type of local URI", e); } return ""; } public boolean isMimeType(String type) throws IOException { String mime = getMimeType(); if (mime.isEmpty()) { return false; } return mime.startsWith(type); } public boolean isPdf() throws IOException { return isMimeType("application/pdf"); } public void addHeader(String key, String value) { this.parameters.put(key, value); } public void setPostData(String postData) { if (postData != null) { this.postData = postData; } } /** * Downloads the web resource to a String. * * @param encoding the desired String encoding * @return the downloaded string */ public String asString(Charset encoding) throws IOException { try (InputStream input = new BufferedInputStream(this.openConnection().getInputStream()); Writer output = new StringWriter()) { copy(input, output, encoding); return output.toString(); } } /** * Downloads the web resource to a String. * Uses UTF-8 as encoding. * * @return the downloaded string */ public String asString() throws IOException { return asString(StandardCharsets.UTF_8); } public List<HttpCookie> getCookieFromUrl() throws IOException { CookieManager cookieManager = new CookieManager(); CookieHandler.setDefault(cookieManager); cookieManager.setCookiePolicy(CookiePolicy.ACCEPT_ALL); URLConnection con = this.openConnection(); con.getHeaderFields(); // must be read to store the cookie try { return cookieManager.getCookieStore().get(this.source.toURI()); } catch (URISyntaxException e) { LOGGER.error("Unable to convert download URL to URI", e); return Collections.emptyList(); } } /** * Downloads the web resource to a file. * * @param destination the destination file path. */ public void toFile(Path destination) throws IOException { try (InputStream input = new BufferedInputStream(this.openConnection().getInputStream())) { Files.copy(input, destination, StandardCopyOption.REPLACE_EXISTING); } catch (IOException e) { LOGGER.warn("Could not copy input", e); throw e; } } /** * Takes the web resource as the source for a monitored input stream. */ public ProgressInputStream asInputStream() throws IOException { URLConnection urlConnection = this.openConnection(); long fileSize = urlConnection.getContentLength(); return new ProgressInputStream(new BufferedInputStream(urlConnection.getInputStream()), fileSize); } /** * Downloads the web resource to a temporary file. * * @return the path of the temporary file. */ public Path toTemporaryFile() throws IOException { // Determine file name and extension from source url String sourcePath = source.getPath(); // Take everything after the last '/' as name + extension String fileNameWithExtension = sourcePath.substring(sourcePath.lastIndexOf('/') + 1); String fileName = FileUtil.getFileName(fileNameWithExtension); String extension = "." + FileHelper.getFileExtension(fileNameWithExtension).orElse("tmp"); // Create temporary file and download to it Path file = Files.createTempFile(fileName, extension); toFile(file); return file; } @Override public String toString() { return "URLDownload{" + "source=" + this.source + '}'; } private void copy(InputStream in, Writer out, Charset encoding) throws IOException { InputStream monitoredInputStream = in; Reader r = new InputStreamReader(monitoredInputStream, encoding); try (BufferedReader read = new BufferedReader(r)) { String line; while ((line = read.readLine()) != null) { out.write(line); out.write("\n"); } } } private URLConnection openConnection() throws IOException { URLConnection connection = this.source.openConnection(); for (Entry<String, String> entry : this.parameters.entrySet()) { connection.setRequestProperty(entry.getKey(), entry.getValue()); } if (!this.postData.isEmpty()) { connection.setDoOutput(true); try (DataOutputStream wr = new DataOutputStream(connection.getOutputStream())) { wr.writeBytes(this.postData); } } if (connection instanceof HttpURLConnection) { // normally, 3xx is redirect int status = ((HttpURLConnection) connection).getResponseCode(); if (status != HttpURLConnection.HTTP_OK) { if (status == HttpURLConnection.HTTP_MOVED_TEMP || status == HttpURLConnection.HTTP_MOVED_PERM || status == HttpURLConnection.HTTP_SEE_OTHER) { // get redirect url from "location" header field String newUrl = connection.getHeaderField("Location"); // open the new connnection again connection = new URLDownload(newUrl).openConnection(); } } } // this does network i/o: GET + read returned headers connection.connect(); return connection; } }