// Copyright 2016 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.lib.bazel.repository.downloader; import com.google.common.base.Ascii; import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; import com.google.common.collect.ImmutableSet; import com.google.common.io.ByteStreams; import com.google.common.math.IntMath; import com.google.devtools.build.lib.concurrent.ThreadSafety.ThreadSafe; import com.google.devtools.build.lib.events.Event; import com.google.devtools.build.lib.events.EventHandler; import com.google.devtools.build.lib.util.Sleeper; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InterruptedIOException; import java.net.HttpURLConnection; import java.net.SocketTimeoutException; import java.net.URL; import java.net.URLConnection; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.List; import java.util.Locale; import java.util.Map; import javax.annotation.Nullable; import javax.annotation.WillClose; /** * Class for establishing connections to HTTP servers for downloading files. * * <p>This class must be used in conjunction with {@link HttpConnectorMultiplexer}. * * <p>Instances are thread safe and can be reused. */ @ThreadSafe class HttpConnector { private static final int MAX_RETRIES = 8; private static final int MAX_REDIRECTS = 40; private static final int MIN_RETRY_DELAY_MS = 100; private static final int MIN_CONNECT_TIMEOUT_MS = 1000; private static final int MAX_CONNECT_TIMEOUT_MS = 10000; private static final int READ_TIMEOUT_MS = 20000; private static final ImmutableSet<String> COMPRESSED_EXTENSIONS = ImmutableSet.of("bz2", "gz", "jar", "tgz", "war", "xz", "zip"); private final Locale locale; private final EventHandler eventHandler; private final ProxyHelper proxyHelper; private final Sleeper sleeper; HttpConnector( Locale locale, EventHandler eventHandler, ProxyHelper proxyHelper, Sleeper sleeper) { this.locale = locale; this.eventHandler = eventHandler; this.proxyHelper = proxyHelper; this.sleeper = sleeper; } URLConnection connect( URL originalUrl, ImmutableMap<String, String> requestHeaders) throws IOException { if (Thread.interrupted()) { throw new InterruptedIOException(); } URL url = originalUrl; if (HttpUtils.isProtocol(url, "file")) { return url.openConnection(); } List<Throwable> suppressions = new ArrayList<>(); int retries = 0; int redirects = 0; int connectTimeout = MIN_CONNECT_TIMEOUT_MS; while (true) { HttpURLConnection connection = null; try { connection = (HttpURLConnection) url.openConnection(proxyHelper.createProxyIfNeeded(url)); boolean isAlreadyCompressed = COMPRESSED_EXTENSIONS.contains(HttpUtils.getExtension(url.getPath())) || COMPRESSED_EXTENSIONS.contains(HttpUtils.getExtension(originalUrl.getPath())); for (Map.Entry<String, String> entry : requestHeaders.entrySet()) { if (isAlreadyCompressed && Ascii.equalsIgnoreCase(entry.getKey(), "Accept-Encoding")) { // We're not going to ask for compression if we're downloading a file that already // appears to be compressed. continue; } connection.setRequestProperty(entry.getKey(), entry.getValue()); } connection.setConnectTimeout(connectTimeout); // The read timeout is always large because it stays in effect after this method. connection.setReadTimeout(READ_TIMEOUT_MS); // Java tries to abstract HTTP error responses for us. We don't want that. So we're going // to try and undo any IOException that doesn't appear to be a legitimate I/O exception. int code; try { connection.connect(); code = connection.getResponseCode(); } catch (FileNotFoundException ignored) { code = connection.getResponseCode(); } catch (UnknownHostException e) { String message = "Unknown host: " + e.getMessage(); eventHandler.handle(Event.progress(message)); throw new UnrecoverableHttpException(message); } catch (IllegalArgumentException e) { // This will happen if the user does something like specify a port greater than 2^16-1. throw new UnrecoverableHttpException(e.getMessage()); } catch (IOException e) { // I'm not sure in what cases this happens, but IOException can be thrown with a null // message. if (e.getMessage() == null) { throw new UnrecoverableHttpException( "Failed to even get an error message from " + url); } if (!e.getMessage().startsWith("Server returned")) { throw e; } code = connection.getResponseCode(); } // 206 means partial content and only happens if caller specified Range. See RFC7233 § 4.1. if (code == 200 || code == 206) { return connection; } else if (code == 301 || code == 302 || code == 307) { readAllBytesAndClose(connection.getInputStream()); if (++redirects == MAX_REDIRECTS) { eventHandler.handle(Event.progress("Redirect loop detected in " + originalUrl)); throw new UnrecoverableHttpException("Redirect loop detected"); } url = HttpUtils.getLocation(connection); if (code == 301) { originalUrl = url; } } else if (code == 403) { // jart@ has noticed BitBucket + Amazon AWS downloads frequently flake with this code. throw new IOException(describeHttpResponse(connection)); } else if (code == 408) { // The 408 (Request Timeout) status code indicates that the server did not receive a // complete request message within the time that it was prepared to wait. Server SHOULD // send the "close" connection option (Section 6.1 of [RFC7230]) in the response, since // 408 implies that the server has decided to close the connection rather than continue // waiting. If the client has an outstanding request in transit, the client MAY repeat // that request on a new connection. Quoth RFC7231 § 6.5.7 throw new IOException(describeHttpResponse(connection)); } else if (code < 500 // 4xx means client seems to have erred quoth RFC7231 § 6.5 || code == 501 // Server doesn't support function quoth RFC7231 § 6.6.2 || code == 502 // Host not configured on server cf. RFC7231 § 6.6.3 || code == 505) { // Server refuses to support version quoth RFC7231 § 6.6.6 // This is a permanent error so we're not going to retry. readAllBytesAndClose(connection.getErrorStream()); throw new UnrecoverableHttpException(describeHttpResponse(connection)); } else { // However we will retry on some 5xx errors, particularly 500 and 503. throw new IOException(describeHttpResponse(connection)); } } catch (UnrecoverableHttpException e) { throw e; } catch (IOException e) { if (connection != null) { // If we got here, it means we might not have consumed the entire payload of the // response, if any. So we're going to force this socket to disconnect and not be // reused. This is particularly important if multiple threads end up establishing // connections to multiple mirrors simultaneously for a large file. We don't want to // download that large file twice. connection.disconnect(); } // We don't respect the Retry-After header (RFC7231 § 7.1.3) because it's rarely used and // tends to be too conservative when it is. We're already being good citizens by using // exponential backoff. Furthermore RFC law didn't use the magic word "MUST". int timeout = IntMath.pow(2, retries) * MIN_RETRY_DELAY_MS; if (e instanceof SocketTimeoutException) { eventHandler.handle(Event.progress("Timeout connecting to " + url)); connectTimeout = Math.min(connectTimeout * 2, MAX_CONNECT_TIMEOUT_MS); // If we got connect timeout, we're already doing exponential backoff, so no point // in sleeping too. timeout = 1; } else if (e instanceof InterruptedIOException) { // Please note that SocketTimeoutException is a subtype of InterruptedIOException. throw e; } if (++retries == MAX_RETRIES) { if (!(e instanceof SocketTimeoutException)) { eventHandler .handle(Event.progress(format("Error connecting to %s: %s", url, e.getMessage()))); } for (Throwable suppressed : suppressions) { e.addSuppressed(suppressed); } throw e; } // Java 7 allows us to create a tree of all errors that led to the ultimate failure. suppressions.add(e); eventHandler.handle( Event.progress(format("Failed to connect to %s trying again in %,dms", url, timeout))); url = originalUrl; try { sleeper.sleepMillis(timeout); } catch (InterruptedException translated) { throw new InterruptedIOException(); } } catch (RuntimeException e) { if (connection != null) { connection.disconnect(); } eventHandler.handle(Event.progress(format("Unknown error connecting to %s: %s", url, e))); throw e; } } } private String describeHttpResponse(HttpURLConnection connection) throws IOException { return format( "%s returned %d %s", connection.getRequestMethod(), connection.getResponseCode(), Strings.nullToEmpty(connection.getResponseMessage())); } private String format(String format, Object... args) { return String.format(locale, format, args); } // Exhausts all bytes in an HTTP to make it easier for Java infrastructure to reuse sockets. private static void readAllBytesAndClose( @WillClose @Nullable InputStream stream) throws IOException { if (stream != null) { ByteStreams.exhaust(stream); stream.close(); } } }