/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.server.remotetask;
import com.facebook.presto.execution.TaskId;
import com.facebook.presto.spi.PrestoException;
import com.facebook.presto.spi.PrestoTransportException;
import com.google.common.collect.ObjectArrays;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListenableFutureTask;
import io.airlift.event.client.ServiceUnavailableException;
import io.airlift.log.Logger;
import io.airlift.units.Duration;
import javax.annotation.concurrent.ThreadSafe;
import java.io.EOFException;
import java.net.SocketException;
import java.net.SocketTimeoutException;
import java.net.URI;
import java.util.Queue;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeoutException;
import static com.facebook.presto.spi.HostAddress.fromUri;
import static com.facebook.presto.spi.StandardErrorCode.REMOTE_TASK_ERROR;
import static com.facebook.presto.spi.StandardErrorCode.TOO_MANY_REQUESTS_FAILED;
import static com.facebook.presto.util.Failures.WORKER_NODE_ERROR;
import static java.lang.String.format;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
@ThreadSafe
class RequestErrorTracker
{
private static final Logger log = Logger.get(RequestErrorTracker.class);
private final TaskId taskId;
private final URI taskUri;
private final ScheduledExecutorService scheduledExecutor;
private final String jobDescription;
private final Backoff backoff;
private final Queue<Throwable> errorsSinceLastSuccess = new ConcurrentLinkedQueue<>();
public RequestErrorTracker(TaskId taskId, URI taskUri, Duration minErrorDuration, Duration maxErrorDuration, ScheduledExecutorService scheduledExecutor, String jobDescription)
{
this.taskId = taskId;
this.taskUri = taskUri;
this.scheduledExecutor = scheduledExecutor;
this.backoff = new Backoff(minErrorDuration, maxErrorDuration);
this.jobDescription = jobDescription;
}
public ListenableFuture<?> acquireRequestPermit()
{
long delayNanos = backoff.getBackoffDelayNanos();
if (delayNanos == 0) {
return Futures.immediateFuture(null);
}
ListenableFutureTask<Object> futureTask = ListenableFutureTask.create(() -> null);
scheduledExecutor.schedule(futureTask, delayNanos, NANOSECONDS);
return futureTask;
}
public void startRequest()
{
// before scheduling a new request clear the error timer
// we consider a request to be "new" if there are no current failures
if (backoff.getFailureCount() == 0) {
requestSucceeded();
}
}
public void requestSucceeded()
{
backoff.success();
errorsSinceLastSuccess.clear();
}
public void requestFailed(Throwable reason)
throws PrestoException
{
// cancellation is not a failure
if (reason instanceof CancellationException) {
return;
}
if (reason instanceof RejectedExecutionException) {
throw new PrestoException(REMOTE_TASK_ERROR, reason);
}
// log failure message
if (isExpectedError(reason)) {
// don't print a stack for a known errors
log.warn("Error " + jobDescription + " %s: %s: %s", taskId, reason.getMessage(), taskUri);
}
else {
log.warn(reason, "Error " + jobDescription + " %s: %s", taskId, taskUri);
}
// remember the first 10 errors
if (errorsSinceLastSuccess.size() < 10) {
errorsSinceLastSuccess.add(reason);
}
// fail the task, if we have more than X failures in a row and more than Y seconds have passed since the last request
if (backoff.failure()) {
// it is weird to mark the task failed locally and then cancel the remote task, but there is no way to tell a remote task that it is failed
PrestoException exception = new PrestoTransportException(TOO_MANY_REQUESTS_FAILED,
fromUri(taskUri),
format("%s (%s %s - %s failures, time since last success %s)",
WORKER_NODE_ERROR,
jobDescription,
taskUri,
backoff.getFailureCount(),
backoff.getTimeSinceLastSuccess().convertTo(SECONDS)));
errorsSinceLastSuccess.forEach(exception::addSuppressed);
throw exception;
}
}
static void logError(Throwable t, String format, Object... args)
{
if (isExpectedError(t)) {
log.error(format + ": %s", ObjectArrays.concat(args, t));
}
else {
log.error(t, format, args);
}
}
private static boolean isExpectedError(Throwable t)
{
while (t != null) {
if ((t instanceof SocketException) ||
(t instanceof SocketTimeoutException) ||
(t instanceof EOFException) ||
(t instanceof TimeoutException) ||
(t instanceof ServiceUnavailableException)) {
return true;
}
t = t.getCause();
}
return false;
}
}