/*
* Copyright © 2015-2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.data.tools.flow;
import co.cask.cdap.api.app.ApplicationSpecification;
import co.cask.cdap.api.dataset.lib.cube.AggregationFunction;
import co.cask.cdap.api.dataset.lib.cube.TimeValue;
import co.cask.cdap.api.flow.FlowSpecification;
import co.cask.cdap.api.flow.FlowletConnection;
import co.cask.cdap.api.metrics.MetricDataQuery;
import co.cask.cdap.api.metrics.MetricStore;
import co.cask.cdap.api.metrics.MetricTimeSeries;
import co.cask.cdap.api.metrics.MetricsCollectionService;
import co.cask.cdap.api.metrics.MetricsContext;
import co.cask.cdap.app.guice.AppFabricServiceRuntimeModule;
import co.cask.cdap.app.guice.AuthorizationModule;
import co.cask.cdap.app.guice.ProgramRunnerRuntimeModule;
import co.cask.cdap.app.guice.ServiceStoreModules;
import co.cask.cdap.app.queue.QueueSpecification;
import co.cask.cdap.app.queue.QueueSpecificationGenerator;
import co.cask.cdap.app.runtime.ProgramRuntimeService;
import co.cask.cdap.app.store.Store;
import co.cask.cdap.common.NotFoundException;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.common.guice.ConfigModule;
import co.cask.cdap.common.guice.DiscoveryRuntimeModule;
import co.cask.cdap.common.guice.IOModule;
import co.cask.cdap.common.guice.KafkaClientModule;
import co.cask.cdap.common.guice.LocationRuntimeModule;
import co.cask.cdap.common.guice.TwillModule;
import co.cask.cdap.common.guice.ZKClientModule;
import co.cask.cdap.common.namespace.NamespaceAdmin;
import co.cask.cdap.common.namespace.NamespacedLocationFactory;
import co.cask.cdap.common.queue.QueueName;
import co.cask.cdap.data.runtime.DataFabricDistributedModule;
import co.cask.cdap.data.runtime.DataSetsModules;
import co.cask.cdap.data.runtime.SystemDatasetRuntimeModule;
import co.cask.cdap.data.stream.StreamAdminModules;
import co.cask.cdap.data.tools.HBaseQueueDebugger;
import co.cask.cdap.data.view.ViewAdminModules;
import co.cask.cdap.data2.dataset2.DatasetFramework;
import co.cask.cdap.data2.queue.QueueClientFactory;
import co.cask.cdap.data2.transaction.queue.QueueAdmin;
import co.cask.cdap.data2.transaction.queue.hbase.HBaseQueueAdmin;
import co.cask.cdap.data2.transaction.queue.hbase.HBaseQueueClientFactory;
import co.cask.cdap.data2.util.hbase.HBaseTableUtil;
import co.cask.cdap.data2.util.hbase.HBaseTableUtilFactory;
import co.cask.cdap.explore.guice.ExploreClientModule;
import co.cask.cdap.internal.app.queue.SimpleQueueSpecificationGenerator;
import co.cask.cdap.internal.app.runtime.flow.FlowUtils;
import co.cask.cdap.internal.app.store.DefaultStore;
import co.cask.cdap.metrics.guice.MetricsClientRuntimeModule;
import co.cask.cdap.notifications.feeds.client.NotificationFeedClientModule;
import co.cask.cdap.notifications.guice.NotificationServiceRuntimeModule;
import co.cask.cdap.proto.Id;
import co.cask.cdap.proto.NamespaceMeta;
import co.cask.cdap.store.guice.NamespaceStoreModule;
import co.cask.tephra.TransactionExecutorFactory;
import co.cask.tephra.TransactionSystemClient;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Table;
import com.google.common.util.concurrent.AbstractIdleService;
import com.google.gson.Gson;
import com.google.inject.AbstractModule;
import com.google.inject.Guice;
import com.google.inject.Inject;
import com.google.inject.Injector;
import com.google.inject.Provides;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.twill.api.RunId;
import org.apache.twill.api.TwillRunnerService;
import org.apache.twill.filesystem.LocationFactory;
import org.apache.twill.kafka.client.KafkaClientService;
import org.apache.twill.zookeeper.ZKClientService;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* Corrects the "queue.pending" metric emitted in {@link co.cask.cdap.internal.app.runtime.flow.FlowletProgramRunner}.
*
* <p>
* Requires the kafka server (configured by cdap-site.xml) to be running, as it emits corrective metric values.
* </p>
*/
public class FlowQueuePendingCorrector extends AbstractIdleService {
private static final Gson GSON = new Gson();
private final MetricsCollectionService metricsCollectionService;
private final MetricStore metricStore;
private final KafkaClientService kafkaClientService;
private final HBaseQueueDebugger queueDebugger;
private final ZKClientService zkClientService;
private final Store store;
private final ProgramRuntimeService programRuntimeService;
private final TwillRunnerService twillRunnerService;
private final NamespaceAdmin namespaceAdmin;
@Inject
public FlowQueuePendingCorrector(HBaseQueueDebugger queueDebugger, ZKClientService zkClientService,
MetricsCollectionService metricsCollectionService, MetricStore metricStore,
KafkaClientService kafkaClientService, Store store,
ProgramRuntimeService programRuntimeService,
TwillRunnerService twillRunnerService, NamespaceAdmin namespaceAdmin) {
this.queueDebugger = queueDebugger;
this.zkClientService = zkClientService;
this.metricsCollectionService = metricsCollectionService;
this.metricStore = metricStore;
this.kafkaClientService = kafkaClientService;
this.store = store;
this.programRuntimeService = programRuntimeService;
this.twillRunnerService = twillRunnerService;
this.namespaceAdmin = namespaceAdmin;
}
/**
* Corrects queue.pending metric for all flowlets in the CDAP instance.
*/
public void run() throws Exception {
System.out.println("Running queue.pending correction");
List<NamespaceMeta> namespaceMetas = namespaceAdmin.list();
for (NamespaceMeta namespaceMeta : namespaceMetas) {
run(Id.Namespace.from(namespaceMeta.getName()));
}
}
/**
* Corrects queue.pending metric for all flowlets in a namespace.
*/
public void run(Id.Namespace namespaceId) throws Exception {
System.out.println("Running queue.pending correction on namespace " + namespaceId);
Collection<ApplicationSpecification> apps = store.getAllApplications(namespaceId);
for (ApplicationSpecification app : apps) {
Id.Application appId = Id.Application.from(namespaceId, app.getName());
run(appId, app);
}
}
/**
* Corrects queue.pending metric for all flowlets in an application.
*/
public void run(Id.Application appId) throws Exception {
ApplicationSpecification app = store.getApplication(appId);
run(appId, app);
}
/**
* Corrects queue.pending metric for all flowlets in an application.
*/
public void run(Id.Application appId, ApplicationSpecification appSpec) throws Exception {
System.out.println("Running queue.pending correction on app " + appId);
Preconditions.checkArgument(appSpec.getName().equals(appId.getId()),
String.format("Expected appSpec name '%s' to be equal to appId name '%s'",
appSpec.getName(), appId.getId()));
for (FlowSpecification flow : appSpec.getFlows().values()) {
run(Id.Flow.from(appId, flow.getName()));
}
}
/**
* Corrects queue.pending metric for a flow.
*/
public void run(final Id.Flow flowId) throws Exception {
ApplicationSpecification app = store.getApplication(flowId.getApplication());
Preconditions.checkArgument(app != null);
Preconditions.checkArgument(app.getFlows().containsKey(flowId.getId()));
FlowSpecification flow = app.getFlows().get(flowId.getId());
run(flowId, flow);
}
public void run(final Id.Flow flowId, FlowSpecification flow) throws Exception {
System.out.println("Running queue.pending correction on flow " + flowId);
SimpleQueueSpecificationGenerator queueSpecGenerator =
new SimpleQueueSpecificationGenerator(flowId.getApplication());
Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> table = queueSpecGenerator.create(flow);
for (Table.Cell<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> cell : table.cellSet()) {
if (cell.getRowKey().getType() == FlowletConnection.Type.FLOWLET) {
String producerFlowlet = cell.getRowKey().getName();
String consumerFlowlet = cell.getColumnKey();
for (QueueSpecification queue : cell.getValue()) {
run(flowId, producerFlowlet, consumerFlowlet, queue.getQueueName().getSimpleName());
}
}
}
}
/**
* Corrects queue.pending metric for a flowlet.
*/
public void run(Id.Flow flowId, String producerFlowlet, String consumerFlowlet,
String flowletQueue) throws Exception {
ApplicationSpecification app = store.getApplication(flowId.getApplication());
Preconditions.checkArgument(app != null, flowId.getApplication() + " not found");
Preconditions.checkArgument(app.getFlows().containsKey(flowId.getId()), flowId + " not found");
FlowSpecification flow = app.getFlows().get(flowId.getId());
run(flowId, producerFlowlet, consumerFlowlet, flowletQueue, flow);
}
/**
* Corrects queue.pending metric for a flowlet.
*/
public void run(Id.Flow flowId, String producerFlowlet, String consumerFlowlet,
String flowletQueue, FlowSpecification flow) throws Exception {
System.out.println("Running queue.pending correction on flow '" + flowId + "' producerFlowlet '" + producerFlowlet
+ "' consumerFlowlet '" + consumerFlowlet + "' flowletQueue '" + flowletQueue + "'");
Map<RunId, ProgramRuntimeService.RuntimeInfo> runtimeInfos = programRuntimeService.list(flowId);
Preconditions.checkState(runtimeInfos.isEmpty(), "Cannot run tool when flow " + flowId + " is still running");
SimpleQueueSpecificationGenerator queueSpecGenerator =
new SimpleQueueSpecificationGenerator(flowId.getApplication());
Table<QueueSpecificationGenerator.Node, String, Set<QueueSpecification>> table = queueSpecGenerator.create(flow);
Preconditions.checkArgument(
table.contains(QueueSpecificationGenerator.Node.flowlet(producerFlowlet), consumerFlowlet),
"Flowlet " + producerFlowlet + " is not emitting to " + consumerFlowlet);
Set<QueueSpecification> queueSpecs =
table.get(QueueSpecificationGenerator.Node.flowlet(producerFlowlet), consumerFlowlet);
boolean validQueue = false;
for (QueueSpecification queueSpec : queueSpecs) {
if (queueSpec.getQueueName().getSimpleName().equals(flowletQueue)) {
validQueue = true;
break;
}
}
Preconditions.checkArgument(validQueue, "Queue " + flowletQueue + " does not exist for the given flowlets");
QueueName queueName = QueueName.fromFlowlet(flowId, producerFlowlet, flowletQueue);
long consumerGroupId = FlowUtils.generateConsumerGroupId(flowId, consumerFlowlet);
long correctQueuePendingValue;
try {
HBaseQueueDebugger.QueueStatistics stats = queueDebugger.scanQueue(queueName, consumerGroupId);
correctQueuePendingValue = stats.getUnprocessed() + stats.getProcessedAndNotVisible();
} catch (NotFoundException e) {
// OK since flowlet queue exists, but actual queue doesn't exist
// (e.g. when running upgrade tool from 2.8 to 3.0)
correctQueuePendingValue = 0;
}
Map<String, String> tags = ImmutableMap.<String, String>builder()
.put(Constants.Metrics.Tag.NAMESPACE, flowId.getNamespaceId())
.put(Constants.Metrics.Tag.APP, flowId.getApplicationId())
.put(Constants.Metrics.Tag.FLOW, flowId.getId())
.put(Constants.Metrics.Tag.CONSUMER, consumerFlowlet)
.put(Constants.Metrics.Tag.PRODUCER, producerFlowlet)
.put(Constants.Metrics.Tag.FLOWLET_QUEUE, flowletQueue)
.build();
MetricDataQuery query = new MetricDataQuery(
0, 0, Integer.MAX_VALUE, 1, ImmutableMap.of("system.queue.pending", AggregationFunction.SUM),
tags, ImmutableList.<String>of(), null);
Collection<MetricTimeSeries> results = metricStore.query(query);
long queuePending;
if (results.isEmpty()) {
queuePending = 0;
} else {
System.out.println("Got results: " + GSON.toJson(results));
Preconditions.checkState(results.size() == 1);
List<TimeValue> timeValues = results.iterator().next().getTimeValues();
Preconditions.checkState(timeValues.size() == 1);
TimeValue timeValue = timeValues.get(0);
queuePending = timeValue.getValue();
}
metricsCollectionService.startAndWait();
MetricsContext collector = metricsCollectionService.getContext(tags);
collector.gauge("queue.pending", correctQueuePendingValue);
System.out.printf("Adjusted system.queue.pending metric from %d to %d (tags %s)\n",
queuePending, correctQueuePendingValue, GSON.toJson(tags));
// stop will flush the metrics
metricsCollectionService.stopAndWait();
}
@Override
protected void startUp() throws Exception {
kafkaClientService.startAndWait();
zkClientService.startAndWait();
twillRunnerService.start();
programRuntimeService.startAndWait();
queueDebugger.startAndWait();
}
@Override
protected void shutDown() throws Exception {
queueDebugger.stopAndWait();
programRuntimeService.stopAndWait();
twillRunnerService.stop();
zkClientService.stopAndWait();
kafkaClientService.stopAndWait();
}
public static FlowQueuePendingCorrector createCorrector() {
Injector injector = Guice.createInjector(
new ConfigModule(CConfiguration.create(), HBaseConfiguration.create()),
new IOModule(),
new ZKClientModule(),
new LocationRuntimeModule().getDistributedModules(),
new DiscoveryRuntimeModule().getDistributedModules(),
new ViewAdminModules().getDistributedModules(),
new StreamAdminModules().getDistributedModules(),
new NotificationFeedClientModule(),
new TwillModule(),
new ExploreClientModule(),
new DataFabricDistributedModule(),
new ServiceStoreModules().getDistributedModules(),
new DataSetsModules().getDistributedModules(),
new AppFabricServiceRuntimeModule().getDistributedModules(),
new ProgramRunnerRuntimeModule().getDistributedModules(),
new SystemDatasetRuntimeModule().getDistributedModules(),
new NotificationServiceRuntimeModule().getDistributedModules(),
new MetricsClientRuntimeModule().getDistributedModules(),
new KafkaClientModule(),
new NamespaceStoreModule().getDistributedModules(),
new AuthorizationModule(),
new AbstractModule() {
@Override
protected void configure() {
bind(QueueClientFactory.class).to(HBaseQueueClientFactory.class).in(Singleton.class);
bind(QueueAdmin.class).to(HBaseQueueAdmin.class).in(Singleton.class);
bind(HBaseTableUtil.class).toProvider(HBaseTableUtilFactory.class);
}
@Provides
@Singleton
@Named("defaultStore")
@SuppressWarnings("unused")
public Store getStore(CConfiguration conf,
LocationFactory locationFactory,
NamespacedLocationFactory namespacedLocationFactory,
final TransactionExecutorFactory txExecutorFactory,
DatasetFramework framework,
TransactionSystemClient txClient) {
return new DefaultStore(conf, locationFactory, namespacedLocationFactory,
txExecutorFactory, framework, txClient);
}
// This is needed because the LocalApplicationManager
// expects a dsframework injection named datasetMDS
@Provides
@Singleton
@Named("datasetMDS")
@SuppressWarnings("unused")
public DatasetFramework getInDsFramework(DatasetFramework dsFramework) {
return dsFramework;
}
});
return injector.getInstance(FlowQueuePendingCorrector.class);
}
public static void main(String[] args) throws Exception {
CommandLine cmd = parseArgs(args);
FlowQueuePendingCorrector corrector = createCorrector();
corrector.startAndWait();
try {
String namespace = cmd.getOptionValue("namespace");
String app = cmd.getOptionValue("app");
String flow = cmd.getOptionValue("flow");
if (!cmd.hasOption("namespace")) {
corrector.run();
} else if (!cmd.hasOption("app")) {
corrector.run(Id.Namespace.from(cmd.getOptionValue("namespace")));
} else if (!cmd.hasOption("flow")) {
Preconditions.checkArgument(cmd.hasOption("namespace"));
corrector.run(Id.Application.from(cmd.getOptionValue("namespace"), cmd.getOptionValue("app")));
} else if (!cmd.hasOption("producer-flowlet") && !cmd.hasOption("consumer-flowlet")) {
corrector.run(Id.Flow.from(cmd.getOptionValue("namespace"), cmd.getOptionValue("app"),
cmd.getOptionValue("flow")));
} else {
Preconditions.checkArgument(cmd.hasOption("producer-flowlet"), "Missing producer-flowlet option");
Preconditions.checkArgument(cmd.hasOption("consumer-flowlet"), "Missing consumer-flowlet option");
String producerFlowlet = cmd.getOptionValue("producer-flowlet");
String consumerFlowlet = cmd.getOptionValue("consumer-flowlet");
String queue = cmd.getOptionValue("queue", "queue");
corrector.run(Id.Flow.from(namespace, app, flow), producerFlowlet, consumerFlowlet, queue);
}
} finally {
corrector.stopAndWait();
}
}
private static CommandLine parseArgs(String[] args) {
Options options = new Options();
options.addOption(createOption("n", "namespace", true,
"namespace (optional, leave empty to correct all flowlets)", false));
options.addOption(createOption("a", "app", true,
"app (optional, leave empty to correct all apps)", false));
options.addOption(createOption("f", "flow", true,
"flow (optional, leave empty to correct all flows)", false));
options.addOption(createOption("p", "producer-flowlet", true,
"producer flowlet (optional, leave empty to correct entire flow)", false));
options.addOption(createOption("c", "consumer-flowlet", true,
"consumer flowlet (optional, leave empty to correct entire flow)", false));
options.addOption(createOption("q", "queue", true, "flowlet queue (optional, defaults to \"queue\")", false));
CommandLineParser parser = new BasicParser();
try {
return parser.parse(options, args);
} catch (ParseException e) {
System.out.println(e.getMessage());
HelpFormatter formatter = new HelpFormatter();
String argsFormat =
"[--namespace <namespace> " +
"[--app <app> " +
"[--flow <flow> " +
"[[--producer-flowlet <flowlet> " +
"--consumer-flowlet <flowlet> " +
"[--queue <queue>]]]]]]";
formatter.printHelp(argsFormat, options);
System.exit(0);
return null;
}
}
private static Option createOption(String opt, String longOpt, boolean hasOpt,
String desc, boolean required) {
Option option = new Option(opt, longOpt, hasOpt, desc);
if (required) {
option.setRequired(true);
}
return option;
}
}