/** * This file is part of Graylog. * * Graylog is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Graylog is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Graylog. If not, see <http://www.gnu.org/licenses/>. */ package org.graylog2.streams; import com.google.common.collect.Maps; import org.graylog2.Configuration; import org.graylog2.notifications.Notification; import org.graylog2.notifications.NotificationService; import org.graylog2.plugin.database.ValidationException; import org.graylog2.plugin.streams.Stream; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import javax.inject.Inject; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.atomic.AtomicInteger; public class StreamFaultManager { private static final Logger LOG = LoggerFactory.getLogger(StreamFaultManager.class); private final StreamMetrics streamMetrics; private final NotificationService notificationService; private final StreamService streamService; private final int maxFaultCount; private final long streamProcessingTimeout; private final ConcurrentMap<String, AtomicInteger> faultCounter = Maps.newConcurrentMap(); @Inject public StreamFaultManager(final Configuration configuration, final StreamMetrics streamMetrics, final NotificationService notificationService, final StreamService streamService) { this.streamMetrics = streamMetrics; this.notificationService = notificationService; this.streamService = streamService; this.maxFaultCount = configuration.getStreamProcessingMaxFaults(); this.streamProcessingTimeout = configuration.getStreamProcessingTimeout(); } public long getStreamProcessingTimeout() { return streamProcessingTimeout; } public void registerFailure(final Stream stream) { final AtomicInteger faultCount = getFaultCount(stream); final int streamFaultCount = faultCount.incrementAndGet(); streamMetrics.markStreamRuleTimeout(stream.getId()); if (maxFaultCount > 0 && streamFaultCount >= maxFaultCount) { try { streamService.pause(stream); faultCount.set(0); streamMetrics.markStreamFaultsExceeded(stream.getId()); LOG.error("Processing of stream <{}> failed to return within {}ms for more than {} times. Disabling stream.", stream.getId(), streamProcessingTimeout, maxFaultCount); triggerNotification(stream, streamFaultCount); } catch (ValidationException ex) { LOG.error("Unable to pause stream: {}", ex); } } else { LOG.warn("Processing of stream <{}> failed to return within {}ms.", stream.getId(), streamProcessingTimeout); } } private void triggerNotification(final Stream stream, final int streamFaultCount) { final Notification notification = notificationService.buildNow() .addType(Notification.Type.STREAM_PROCESSING_DISABLED) .addSeverity(Notification.Severity.URGENT) .addDetail("stream_id", stream.getId()) .addDetail("fault_count", streamFaultCount); notificationService.publishIfFirst(notification); } private AtomicInteger getFaultCount(final Stream stream) { faultCounter.putIfAbsent(stream.getId(), new AtomicInteger()); return faultCounter.get(stream.getId()); } }