/* * Copyright 2013 Netflix, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.netflix.suro.input.thrift; import com.fasterxml.jackson.databind.ObjectMapper; import com.google.inject.Inject; import com.netflix.governator.guice.lazy.LazySingleton; import com.netflix.servo.annotations.DataSourceType; import com.netflix.servo.annotations.Monitor; import com.netflix.servo.monitor.DynamicCounter; import com.netflix.servo.monitor.MonitorConfig; import com.netflix.servo.monitor.Monitors; import com.netflix.suro.ClientConfig; import com.netflix.suro.TagKey; import com.netflix.suro.input.SuroInput; import com.netflix.suro.message.DefaultMessageContainer; import com.netflix.suro.message.Message; import com.netflix.suro.message.MessageSetBuilder; import com.netflix.suro.message.MessageSetReader; import com.netflix.suro.queue.Queue4Server; import com.netflix.suro.routing.MessageRouter; import com.netflix.suro.thrift.*; import org.apache.thrift.TException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; /** * The {@link TMessageSet} processor used by {@link com.netflix.suro.input.thrift.ThriftServer}. It takes incoming {@link TMessageSet} * sent by Suro client, validates each message set's CRC32 code, and then hands off validated message set to an internal queue. * A {@link MessageRouter} instance will asynchronously route the messages in the queue into configured sinks based on routing rules, * represented by {@link com.amazonaws.services.s3.model.RoutingRule}. * * Since this is the frontend of Thrift Server, it is implementing service status * and controlling to take the traffic or not. * * @author jbae */ @LazySingleton public class MessageSetProcessor implements SuroServer.Iface { private static final Logger log = LoggerFactory.getLogger(MessageSetProcessor.class); private SuroInput input; private boolean isTakingTraffic = true; public void stopTakingTraffic(){ this.isTakingTraffic = false; } public void startTakingTraffic(){ this.isTakingTraffic = true; } @Override public ServiceStatus getStatus() { if (isTakingTraffic){ return ServiceStatus.ALIVE; } else { return ServiceStatus.WARNING; } } private volatile boolean isRunning = false; private final Queue4Server queue; private final MessageRouter router; private final ServerConfig config; private ExecutorService executors; private final ObjectMapper jsonMapper; @Inject public MessageSetProcessor( Queue4Server queue, MessageRouter router, ServerConfig config, ObjectMapper jsonMapper) throws Exception { this.queue = queue; this.router = router; this.config = config; this.jsonMapper = jsonMapper; isRunning = true; Monitors.registerObject(this); } private static final String messageCountMetrics = "messageCount"; private static final String retryCountMetrics = "retryCount"; private static final String dataCorruptionCountMetrics = "corruptedMessageCount"; private static final String rejectedMessageCountMetrics = "rejectedMessageCount"; private static final String messageProcessErrorMetrics = "processErrorCount"; @Monitor(name ="QueueSize", type= DataSourceType.GAUGE) public int getQueueSize() { return queue.size(); } @Override public String getName() throws TException { return "Suro-MessageQueue"; } @Override public String getVersion() throws TException { return "V0.1.0"; } @Override public Result process(TMessageSet messageSet) throws TException { Result result = new Result(); try { // Stop adding chunks if it's no running if ( !isRunning) { DynamicCounter.increment(rejectedMessageCountMetrics, TagKey.APP, messageSet.getApp(), TagKey.REJECTED_REASON, "SURO_STOPPED"); log.warn("Message processor is not running. Message rejected"); result.setMessage("Suro server stopped"); result.setResultCode(ResultCode.STOPPED); return result; } if ( !isTakingTraffic ) { DynamicCounter.increment(rejectedMessageCountMetrics, TagKey.APP, messageSet.getApp(), TagKey.REJECTED_REASON, "SURO_THROTTLING"); log.warn("Suro is not taking traffic. Message rejected. "); result.setMessage("Suro server is not taking traffic"); result.setResultCode(ResultCode.OTHER_ERROR); return result; } MessageSetReader reader = new MessageSetReader(messageSet); if (!reader.checkCRC()) { DynamicCounter.increment(dataCorruptionCountMetrics, TagKey.APP, messageSet.getApp()); result.setMessage("data corrupted"); result.setResultCode(ResultCode.CRC_CORRUPTED); return result; } if (queue.offer(messageSet)) { DynamicCounter.increment( MonitorConfig.builder(messageCountMetrics) .withTag(TagKey.APP, messageSet.getApp()) .build(), messageSet.getNumMessages()); result.setMessage(Long.toString(messageSet.getCrc())); result.setResultCode(ResultCode.OK); } else { DynamicCounter.increment(retryCountMetrics, TagKey.APP, messageSet.getApp()); result.setMessage(Long.toString(messageSet.getCrc())); result.setResultCode(ResultCode.QUEUE_FULL); } return result; } catch (Exception e) { log.error("Exception when processing message set " + e.getMessage(), e); } return result; } public void start() { log.info("Starting processing message queue."); isRunning = true; executors = Executors.newFixedThreadPool(config.getMessageRouterThreads()); for (int i = 0; i < config.getMessageRouterThreads(); ++i) { executors.execute(new Runnable() { @Override public void run() { TMessageSet tMessageSet; long waitTime = config.messageRouterDefaultPollTimeout; while (isRunning) { try { tMessageSet = queue.poll(waitTime, TimeUnit.MILLISECONDS); if (tMessageSet == null) { if (waitTime < config.messageRouterMaxPollTimeout) { waitTime += config.messageRouterDefaultPollTimeout; } continue; } waitTime = config.messageRouterDefaultPollTimeout; processMessageSet(tMessageSet); } catch (Exception e) { log.error("Exception while handling TMessageSet: " + e.getMessage(), e); } } // drain remain when shutting down while ( !queue.isEmpty() ) { try { tMessageSet = queue.poll(0, TimeUnit.MILLISECONDS); processMessageSet(tMessageSet); } catch (Exception e) { log.error("Exception while processing drained message set: "+e.getMessage(), e); } } } }); } } @SuppressWarnings("unchecked") private void processMessageSet(TMessageSet tMessageSet) { MessageSetReader reader = new MessageSetReader(tMessageSet); for (final Message message : reader) { try { router.process(input, new DefaultMessageContainer(message, jsonMapper)); } catch (Exception e) { DynamicCounter.increment(messageProcessErrorMetrics, TagKey.APP, tMessageSet.getApp(), TagKey.DATA_SOURCE, message.getRoutingKey()); log.error(String.format("Failed to process message %s: %s", message, e.getMessage()), e); } } } @Override public long shutdown() throws TException { shutdown(config.messageRouterMaxPollTimeout * 2); return 0; } public void shutdown(long timeout) { log.info("MessageQueue is shutting down"); isRunning = false; try { executors.shutdown(); executors.awaitTermination(timeout, TimeUnit.MILLISECONDS); if ( !executors.isTerminated() ) { log.error("MessageDispatcher was not shut down gracefully"); } executors.shutdownNow(); } catch (InterruptedException e) { Thread.interrupted(); } } public TMessageSet poll(long timeout, TimeUnit unit) { try { return queue.poll(timeout, unit); } catch (InterruptedException e) { Thread.interrupted(); return new MessageSetBuilder(new ClientConfig()).build(); } } public void setInput(SuroInput input) { this.input = input; } }