package org.dcache.pool.movers; import com.google.common.base.Optional; import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpRequest; import org.apache.http.HttpResponse; import org.apache.http.StatusLine; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpDelete; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpHead; import org.apache.http.client.methods.HttpPut; import org.apache.http.entity.InputStreamEntity; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.URI; import java.nio.channels.Channels; import java.security.NoSuchAlgorithmException; import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; import diskCacheV111.util.CacheException; import diskCacheV111.util.ChecksumFactory; import diskCacheV111.util.ThirdPartyTransferFailedCacheException; import diskCacheV111.vehicles.ProtocolInfo; import diskCacheV111.vehicles.RemoteHttpDataTransferProtocolInfo; import dmg.cells.nucleus.CellEndpoint; import org.dcache.auth.OpenIdCredential; import org.dcache.auth.OpenIdCredentialRefreshable; import org.dcache.pool.movers.MoverChannel.AllocatorMode; import org.dcache.pool.repository.Allocator; import org.dcache.pool.repository.RepositoryChannel; import org.dcache.util.Checksum; import org.dcache.util.ChecksumType; import org.dcache.util.Checksums; import org.dcache.util.Version; import org.dcache.vehicles.FileAttributes; import static com.google.common.collect.Maps.uniqueIndex; import static org.dcache.namespace.FileAttribute.CHECKSUM; import static org.dcache.util.ByteUnit.MiB; /** * This class implements transfers of data between a pool and some remote * HTTP server. Both writing data into dCache and reading from dCache are * supported (HTTP GET and PUT respectively). On-transfer checksum calculation * is supported. Support is also included for RFC-3230, which allows the remote * server to specify one or more checksums as part of a response to a GET or * HEAD request. * * If the remote server supports RFC-3230 then this is used to discover the * remote file's checksum(s). Using this, dCache will always try to verify the * transfer was successful. dCache will be unable to do this only if the file is * sent to a remote server that provides a set of checksums that doesn't overlap * with the set of checksums dCache knows for this file. * * The mover supports a require-checksum-verification flag. * * When enabled, the integrity of the transferred data must be verified by * checking a remote supplied checksum matches one known locally (either * calculated as part of the transfer or already know for this file). If the * flag is enabled and verification is impossible (e.g., the remote server * supplied no checksums) then the transfer will fail. * * If require-checksum-verification flag is disabled then a transfer will not * fail if the remote server supplies no checksum; however, if checksums are * supplied then they are checked against locally known checksums (either * calculated as part of the transfer or already known for this file) and a * mismatch will fail the transfer. * * * WRITE REQUESTS * * The pool accepts only a single client-supplied (i.e., from the remote server) * checksum value. Therefore if the remote server supplies more than one * checksum then one is selected: either matching the pool's on-transfer * checksum choice or a hard-coded preference list. * * If require-checksum-verification is enabled and the remote server supplied * no checksum that dCache understands, or the server doesn't support RFC-3230, * then the transfer fails. * * * READ REQUESTS * * When the request is for reading data (an HTTP PUT request) then the mover * will copy the file to the remote server and try to verify that the file * arrived OK using the HTTP HEAD command. If the HEAD request fails or the * Content-Length value is wrong, or (if the remote server supports RFC-3230) * the supplied checksums indicate data corruption then the mover will fail * the transfer. * * If checksum-verification-required is enabled and the remote server does not * support RFC-3230 or none of the checksums provided by the remote server * were calculated using the same algorithm as a known checksum for this file * then the mover will fail the transfer. * * If checksum-verification-required is disabled then a lack of checksum * verification does not fail the transfer. * * If the PUT request fails, for whatever reason, then the mover will attempt * to clear up the transfer by deleting the remote copy via the HTTP DELETE * command. If the cleanup is successful then the error triggering the cleanup * is reported. If the cleanup is unsuccessful then an error is reported * containing both the error in removing the remote file and the error that * triggered the delete. */ public class RemoteHttpDataTransferProtocol implements MoverProtocol, ChecksumMover { private static final Logger _log = LoggerFactory.getLogger(RemoteHttpDataTransferProtocol.class); /** Maximum time to wait when establishing a connection. */ private static final int CONNECTION_TIMEOUT = (int) TimeUnit.MINUTES.toMillis(1); /** Maximum time to wait for next packet from remote server. */ private static final int SOCKET_TIMEOUT = (int) TimeUnit.MINUTES.toMillis(1); /** * Expected maximum delay all post-processing files will experience, * in milliseconds. */ private static final long POST_PROCESSING_OFFSET = 60_000; /** * Expected minimum (effective) internal IO bandwidth of the remote * server, in bytes per millisecond. This is used to estimate how long * any file post-processing (like checksum calculation) will take. */ private static final double POST_PROCESSING_BANDWIDTH = MiB.toBytes(100) / 1_000.0; /** Number of milliseconds between successive requests. */ private static final long DELAY_BETWEEN_REQUESTS = 5_000; /** * Maximum number of redirections to follow. * Note that, although RFC 2068 section 10.3 recommends a maximum of 5, * both firefox and webkit currently limit (by default) to 20 redirections. */ private static final int MAX_REDIRECTIONS = 20; private static final String AUTH_BEARER = "Bearer "; // REVISIT: we may wish to generate a value based on the algorithms dCache // supports private static final String WANT_DIGEST_VALUE = "adler32;q=1, md5;q=0.8"; protected static final String USER_AGENT = "dCache/" + Version.of(RemoteHttpDataTransferProtocol.class).getVersion(); // Pool-supplied factory for on-transfer checksums, null if disabled. private ChecksumFactory _onTransfer; // The RepositoryChannel to satisfy on-transfer checksum calculation. private ChecksumChannel _onTransferChecksumChannel; // The RepositoryChannel to verify data integrety when remote supplied // checksums that don't overlap with the on-transfer checksum. private ChecksumChannel _remoteSuppliedChecksumChannel; private volatile MoverChannel<RemoteHttpDataTransferProtocolInfo> _channel; private Checksum _remoteSuppliedChecksum; private CloseableHttpClient _client; public RemoteHttpDataTransferProtocol(CellEndpoint cell) { // constructor needed by Pool mover contract. } private static void checkThat(boolean isOk, String message) throws CacheException { if (!isOk) { throw new CacheException(message); } } @Override public void runIO(FileAttributes attributes, RepositoryChannel channel, ProtocolInfo genericInfo, Allocator allocator, IoMode access) throws CacheException, IOException, InterruptedException { _log.debug("info={}, attributes={}, access={}", genericInfo, attributes, access); RemoteHttpDataTransferProtocolInfo info = (RemoteHttpDataTransferProtocolInfo) genericInfo; _channel = new MoverChannel<>(access, attributes, info, channel, allocator, AllocatorMode.HARD); _client = createHttpClient(); try { switch (access) { case WRITE: receiveFile(info); break; case READ: checkThat(!info.isVerificationRequired() || attributes.isDefined(CHECKSUM), "checksum verification failed: file has no checksum"); sendAndCheckFile(info); break; } } finally { _client.close(); } } protected CloseableHttpClient createHttpClient() throws CacheException { return HttpClients.custom().setUserAgent(USER_AGENT).build(); } private void receiveFile(final RemoteHttpDataTransferProtocolInfo info) throws ThirdPartyTransferFailedCacheException { HttpGet get = buildGetRequest(info); try (CloseableHttpResponse response = _client.execute(get)) { StatusLine statusLine = response.getStatusLine(); if (statusLine.getStatusCode() >= 300) { throw new ThirdPartyTransferFailedCacheException("remote " + "server rejected GET: " + statusLine.getStatusCode() + " " + statusLine.getReasonPhrase()); } String rfc3230 = headerValue(response, "Digest"); Map<ChecksumType,Checksum> checksums = uniqueIndex(Checksums.decodeRfc3230(rfc3230), Checksum::getType); if (!checksums.isEmpty()) { if (_onTransfer != null && checksums.containsKey(_onTransfer.getType())) { _remoteSuppliedChecksum = checksums.get(_onTransfer.getType()); } else { _remoteSuppliedChecksum = Checksums.preferrredOrder().min(checksums.values()); } } if (_remoteSuppliedChecksum == null && info.isVerificationRequired()) { throw new ClientProtocolException("failed to verify transfer: " + "server sent no useful checksum: " + (rfc3230 == null ? "(none sent)" : rfc3230)); } // NB. we MUST NOT close RepositoryChannel as pool wants to do this RepositoryChannel to = decorateForChecksumCalculation(_channel); HttpEntity entity = response.getEntity(); if (entity == null) { throw new ClientProtocolException("Response contains no content"); } entity.writeTo(Channels.newOutputStream(to)); } catch (IOException e) { throw new ThirdPartyTransferFailedCacheException(e.toString(), e); } if (_remoteSuppliedChecksum != null) { Checksum transferChecksum = (_remoteSuppliedChecksumChannel != null) ? _remoteSuppliedChecksumChannel.getChecksum() : _onTransferChecksumChannel.getChecksum(); if (!_remoteSuppliedChecksum.equals(transferChecksum)) { throw new ThirdPartyTransferFailedCacheException( String.format("Received data does not match remote server's checksum (%s != %s)", transferChecksum, _remoteSuppliedChecksum)); } } } private HttpGet buildGetRequest(RemoteHttpDataTransferProtocolInfo info) { HttpGet get = new HttpGet(info.getUri()); get.addHeader("Want-Digest", WANT_DIGEST_VALUE); addHeadersToRequest(info, get); get.setConfig(RequestConfig.custom() .setConnectTimeout(CONNECTION_TIMEOUT) .setSocketTimeout(SOCKET_TIMEOUT) .build()); return get; } private RepositoryChannel decorateForChecksumCalculation(RepositoryChannel baseChannel) { RepositoryChannel channel = baseChannel; if (_onTransfer != null) { channel = _onTransferChecksumChannel = new ChecksumChannel(channel, _onTransfer); } if (_remoteSuppliedChecksum != null && (_onTransfer == null || _onTransfer.getType() != _remoteSuppliedChecksum.getType())) { try { channel = _remoteSuppliedChecksumChannel = new ChecksumChannel(channel, ChecksumFactory.getFactoryFor(_remoteSuppliedChecksum)); } catch (NoSuchAlgorithmException e) { throw new RuntimeException("cannot find algorithm: " + e.getMessage(), e); } } return channel; } private void sendAndCheckFile(RemoteHttpDataTransferProtocolInfo info) throws ThirdPartyTransferFailedCacheException { sendFile(info, _channel.getFileAttributes().getSize()); try { verifyRemoteFile(info); } catch (ThirdPartyTransferFailedCacheException e) { deleteRemoteFile(e.getMessage(), info); throw new ThirdPartyTransferFailedCacheException("verification " + "failed: " + e.getMessage()); } } private void sendFile(RemoteHttpDataTransferProtocolInfo info, long length) throws ThirdPartyTransferFailedCacheException { URI location = info.getUri(); for (int attempt = 0; attempt < MAX_REDIRECTIONS; attempt++) { HttpPut put = buildPutRequest(info, length); try (CloseableHttpResponse response = _client.execute(put)) { StatusLine status = response.getStatusLine(); switch (status.getStatusCode()) { case 200: /* OK (not actually a valid response from PUT) */ case 201: /* Created */ return; case 300: /* Multiple Choice */ case 301: /* Moved Permanently */ case 302: /* Found (REVISIT: should we treat this as an error?) */ case 307: /* Temporary Redirect */ case 308: /* Permanent Redirect */ String locationHeader = response.getFirstHeader("Location").getValue(); if (locationHeader == null) { throw new ThirdPartyTransferFailedCacheException("remote " + "server replied " + status.getStatusCode() + " (" + status.getReasonPhrase() + ") without a " + "Location header"); } try { location = URI.create(locationHeader); } catch (IllegalArgumentException e) { throw new ThirdPartyTransferFailedCacheException("remote " + "server redirected to invalid URL '" + locationHeader + "': " + e.getMessage()); } break; /* Treat all other responses as a failure. */ default: throw new ThirdPartyTransferFailedCacheException("remote " + "server rejected PUT: " + status.getStatusCode() + " " + status.getReasonPhrase()); } } catch (IOException e) { _log.error("problem connecting: {}", e.toString()); throw new ThirdPartyTransferFailedCacheException("failed to " + "connect to server: " + e.toString(), e); } } _log.error("too many redirects, last location was: {}", location); throw new ThirdPartyTransferFailedCacheException("exceeded maximum " + "number of redirections; last location was " + location); } private HttpPut buildPutRequest(RemoteHttpDataTransferProtocolInfo info, long length) { HttpPut put = new HttpPut(info.getUri()); put.setConfig(RequestConfig.custom() .setConnectTimeout(CONNECTION_TIMEOUT) .setExpectContinueEnabled(true) .setSocketTimeout(0) .build()); addHeadersToRequest(info, put); put.setEntity(new InputStreamEntity(Channels.newInputStream(_channel), length)); // FIXME add SO_KEEPALIVE setting return put; } private void verifyRemoteFile(RemoteHttpDataTransferProtocolInfo info) throws ThirdPartyTransferFailedCacheException { FileAttributes attributes = _channel.getFileAttributes(); boolean isFirstAttempt = true; /* * We estimate how long any post-processing will take based on a * linear model. The model is: * * t_max = alpha + S / beta * * where t_max is the maximum time post-processing is expected to take, * S is the file's size, alpha is the fixed time that all files require * and beta is the effective IO bandwidth within the remote server. */ long t_max = POST_PROCESSING_OFFSET + (long)(attributes.getSize() / POST_PROCESSING_BANDWIDTH); long deadline = System.currentTimeMillis() + t_max; try { while (System.currentTimeMillis() < deadline) { long sleepFor = Math.min(deadline - System.currentTimeMillis(), DELAY_BETWEEN_REQUESTS); if (!isFirstAttempt && sleepFor > 0) { Thread.sleep(sleepFor); } isFirstAttempt = false; HttpHead head = buildHeadRequest(info); try (CloseableHttpResponse response = _client.execute(head)) { StatusLine status = response.getStatusLine(); if (status.getStatusCode() >= 300) { throw new ThirdPartyTransferFailedCacheException("remote " + "server rejected HEAD: " + status.getStatusCode() + " " + status.getReasonPhrase()); } Long length = getContentLength(response); if (length == null || (attributes.getSize() != 0 && length == 0)) { continue; } if (attributes.getSize() != length) { throw new ThirdPartyTransferFailedCacheException( String.format("server reported wrong file size (%d != %d)", length, attributes.getSize())); } String rfc3230 = headerValue(response, "Digest"); checkChecksums(info, rfc3230, attributes.getChecksumsIfPresent()); return; } catch (IOException e) { throw new ThirdPartyTransferFailedCacheException("failed to " + "connect to server: " + e.toString(), e); } } } catch (InterruptedException e) { throw new ThirdPartyTransferFailedCacheException("pool is shutting down", e); } throw new ThirdPartyTransferFailedCacheException("remote server failed " + "to provide length after " + (t_max / 1_000) + "s"); } private HttpHead buildHeadRequest(RemoteHttpDataTransferProtocolInfo info) { HttpHead head = new HttpHead(info.getUri()); head.addHeader("Want-Digest", WANT_DIGEST_VALUE); head.setConfig(RequestConfig.custom() .setConnectTimeout(CONNECTION_TIMEOUT) .setSocketTimeout(SOCKET_TIMEOUT) .build()); addHeadersToRequest(info, head); return head; } private void checkChecksums(RemoteHttpDataTransferProtocolInfo info, String rfc3230, Optional<Set<Checksum>> knownChecksums) throws ThirdPartyTransferFailedCacheException { Map<ChecksumType,Checksum> checksums = uniqueIndex(Checksums.decodeRfc3230(rfc3230), Checksum::getType); boolean verified = false; if (knownChecksums.isPresent()) { for (Checksum ourChecksum : knownChecksums.get()) { ChecksumType type = ourChecksum.getType(); if (checksums.containsKey(type)) { checkChecksumEqual(ourChecksum, checksums.get(type)); verified = true; } } } if (info.isVerificationRequired() && !verified) { throw new ThirdPartyTransferFailedCacheException("server sent no useful checksum: " + (rfc3230 == null ? "(none sent)" : rfc3230)); } } private static String headerValue(HttpResponse response, String headerName) { Header header = response.getFirstHeader(headerName); return header != null ? header.getValue() : null; } private static Long getContentLength(HttpResponse response) throws ThirdPartyTransferFailedCacheException { Header header = response.getLastHeader("Content-Length"); if (header == null) { return null; } try { return Long.parseLong(header.getValue()); } catch (NumberFormatException e) { throw new ThirdPartyTransferFailedCacheException("server sent malformed Content-Length header", e); } } private static void checkChecksumEqual(Checksum expected, Checksum actual) throws ThirdPartyTransferFailedCacheException { if (expected.getType() != actual.getType()) { throw new RuntimeException("internal error: checksum comparison " + "between different types (" + expected.getType() + " != " + actual.getType()); } if (!expected.equals(actual)) { throw new ThirdPartyTransferFailedCacheException(expected.getType().getName() + " " + actual.getValue() + " != " + expected.getValue()); } } private void deleteRemoteFile(String why, RemoteHttpDataTransferProtocolInfo info) throws ThirdPartyTransferFailedCacheException { HttpDelete delete = buildDeleteRequest(info); try (CloseableHttpResponse response = _client.execute(delete)) { StatusLine status = response.getStatusLine(); if (status.getStatusCode() >= 300) { throw new ThirdPartyTransferFailedCacheException("remote " + "server rejected DELETE: " + status.getStatusCode() + " " + status.getReasonPhrase()); } } catch (CacheException e) { throw new ThirdPartyTransferFailedCacheException("delete of " + "remote file (triggered by " + why + ") failed: " + e.getMessage()); } catch (IOException e) { throw new ThirdPartyTransferFailedCacheException("delete of " + "remote file (triggered by " + why + ") failed: " + e.toString()); } } private HttpDelete buildDeleteRequest(RemoteHttpDataTransferProtocolInfo info) { HttpDelete delete = new HttpDelete(info.getUri()); delete.setConfig(RequestConfig.custom() .setConnectTimeout(CONNECTION_TIMEOUT) .setSocketTimeout(SOCKET_TIMEOUT) .build()); addHeadersToRequest(info, delete); return delete; } private void addHeadersToRequest(RemoteHttpDataTransferProtocolInfo info, HttpRequest request) { info.getHeaders().forEach(request::addHeader); if (info.hasTokenCredential()) { request.addHeader("Authorization", AUTH_BEARER + new OpenIdCredentialRefreshable(info.getTokenCredential(), _client).getBearerToken()); } } @Override public long getLastTransferred() { MoverChannel<RemoteHttpDataTransferProtocolInfo> channel = _channel; return channel == null ? System.currentTimeMillis() : channel.getLastTransferred(); } @Override public long getBytesTransferred() { MoverChannel<RemoteHttpDataTransferProtocolInfo> channel = _channel; return channel == null ? 0 : channel.getBytesTransferred(); } @Override public long getTransferTime() { MoverChannel<RemoteHttpDataTransferProtocolInfo> channel = _channel; return channel == null ? 0 : channel.getTransferTime(); } @Override public void enableTransferChecksum(ChecksumType suggestedAlgorithm) throws NoSuchAlgorithmException { _onTransfer = ChecksumFactory.getFactory(suggestedAlgorithm); } @Override public Checksum getActualChecksum() { return _onTransferChecksumChannel != null ? _onTransferChecksumChannel.getChecksum() : null; } @Override public Checksum getExpectedChecksum() { return _remoteSuppliedChecksum; } }