/**
* Copyright 2015-2017 The OpenZipkin Authors
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
* in compliance with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
* or implied. See the License for the specific language governing permissions and limitations under
* the License.
*/
package zipkin.collector.kafka10;
import com.github.charithe.kafka.EphemeralKafkaBroker;
import com.github.charithe.kafka.KafkaJunitRule;
import java.util.List;
import java.util.Properties;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.curator.test.InstanceSpec;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.KafkaException;
import org.junit.After;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import org.junit.rules.Timeout;
import zipkin.Codec;
import zipkin.Span;
import zipkin.collector.InMemoryCollectorMetrics;
import zipkin.collector.kafka10.KafkaCollector.Builder;
import zipkin.storage.AsyncSpanConsumer;
import zipkin.storage.AsyncSpanStore;
import zipkin.storage.SpanStore;
import zipkin.storage.StorageComponent;
import static org.assertj.core.api.Assertions.assertThat;
import static zipkin.TestObjects.TRACE;
public class KafkaCollectorTest {
private static final int RANDOM_PORT = -1;
private static final EphemeralKafkaBroker broker =
EphemeralKafkaBroker.create(RANDOM_PORT, RANDOM_PORT, buildBrokerConfig());
@ClassRule public static KafkaJunitRule kafka = new KafkaJunitRule(broker).waitForStartup();
@ClassRule public static Timeout globalTimeout = Timeout.seconds(20);
@Rule public ExpectedException thrown = ExpectedException.none();
InMemoryCollectorMetrics metrics = new InMemoryCollectorMetrics();
InMemoryCollectorMetrics kafkaMetrics = metrics.forTransport("kafka");
CopyOnWriteArraySet<Thread> threadsProvidingSpans = new CopyOnWriteArraySet<>();
LinkedBlockingQueue<List<Span>> receivedSpans = new LinkedBlockingQueue<>();
AsyncSpanConsumer consumer = (spans, callback) -> {
threadsProvidingSpans.add(Thread.currentThread());
receivedSpans.add(spans);
callback.onSuccess(null);
};
private KafkaProducer<byte[], byte[]> producer;
private static Properties buildBrokerConfig() {
final Properties config = new Properties();
config.setProperty("num.partitions", "2");
return config;
}
@Before
public void setup() {
producer = kafka.helper().createByteProducer();
}
@After
public void teardown() {
producer.close();
}
@Test
public void checkPasses() throws Exception {
try (KafkaCollector collector = builder("check_passes").build()) {
assertThat(collector.check().ok).isTrue();
}
}
@Test
public void start_failsOnInvalidBootstrapServers() throws Exception {
thrown.expect(KafkaException.class);
thrown.expectMessage("Failed to construct kafka consumer");
Builder builder = builder("fail_invalid_bootstrap_servers").bootstrapServers("1.1.1.1");
try (KafkaCollector collector = builder.build()) {
collector.start();
}
}
/**
* If the Kafka broker(s) specified in the connection string are not available, the Kafka
* consumer library will attempt to reconnect indefinitely. The Kafka consumer will not throw
* an exception and does not expose the status of its connection to the Kafka broker(s) in its
* API. The only control over this behavior provided is setting the delay between reconnection
* attempts. This is controlled through the consumer config property "reconnect.backoff.ms",
* which defaults to a value of 50.
*
* In this case, "unavailable" means that the Kafka consumer cannot establish a connection to
* at least one of the hostname/IP and port combinations provided in the bootstrap
* brokers list.
*
* There is an opportunity to improve visibility by having {@link KafkaCollector#check()}
* interrogate the metrics provided by the Kafka consumer (see
* {@link org.apache.kafka.clients.consumer.KafkaConsumer#metrics()}) to determine whether
* connectivity to Kafka appears to be up based on observed activity.
*/
@Test
public void reconnectsIndefinitelyAndReportsHealthyWhenKafkaUnavailable() throws Exception {
Builder builder = builder("fail_invalid_bootstrap_servers")
.bootstrapServers("localhost:" + InstanceSpec.getRandomPort());
try (KafkaCollector collector = builder.build()) {
collector.start();
Thread.sleep(TimeUnit.SECONDS.toMillis(1));
assertThat(collector.check().ok).isTrue();
}
}
/** Ensures legacy encoding works: a single TBinaryProtocol encoded span */
@Test
public void messageWithSingleThriftSpan() throws Exception {
Builder builder = builder("single_span");
byte[] bytes = Codec.THRIFT.writeSpan(TRACE.get(0));
produceSpans(bytes, builder.topic);
try (KafkaCollector collector = builder.build()) {
collector.start();
assertThat(receivedSpans.take()).containsExactly(TRACE.get(0));
}
assertThat(kafkaMetrics.messages()).isEqualTo(1);
assertThat(kafkaMetrics.bytes()).isEqualTo(bytes.length);
assertThat(kafkaMetrics.spans()).isEqualTo(1);
}
/** Ensures list encoding works: a TBinaryProtocol encoded list of spans */
@Test
public void messageWithMultipleSpans_thrift() throws Exception {
Builder builder = builder("multiple_spans_thrift");
byte[] bytes = Codec.THRIFT.writeSpans(TRACE);
produceSpans(bytes, builder.topic);
try (KafkaCollector collector = builder.build()) {
collector.start();
assertThat(receivedSpans.take()).containsExactlyElementsOf(TRACE);
}
assertThat(kafkaMetrics.messages()).isEqualTo(1);
assertThat(kafkaMetrics.bytes()).isEqualTo(bytes.length);
assertThat(kafkaMetrics.spans()).isEqualTo(TRACE.size());
}
/** Ensures list encoding works: a json encoded list of spans */
@Test
public void messageWithMultipleSpans_json() throws Exception {
Builder builder = builder("multiple_spans_json");
byte[] bytes = Codec.JSON.writeSpans(TRACE);
produceSpans(bytes, builder.topic);
try (KafkaCollector collector = builder.build()) {
collector.start();
assertThat(receivedSpans.take()).containsExactlyElementsOf(TRACE);
}
assertThat(kafkaMetrics.messages()).isEqualTo(1);
assertThat(kafkaMetrics.bytes()).isEqualTo(bytes.length);
assertThat(kafkaMetrics.spans()).isEqualTo(TRACE.size());
}
/** Ensures malformed spans don't hang the collector */
@Test
public void skipsMalformedData() throws Exception {
Builder builder = builder("decoder_exception");
produceSpans(Codec.THRIFT.writeSpans(TRACE), builder.topic);
produceSpans(new byte[0], builder.topic);
produceSpans("[\"='".getBytes(), builder.topic); // screwed up json
produceSpans("malformed".getBytes(), builder.topic);
produceSpans(Codec.THRIFT.writeSpans(TRACE), builder.topic);
try (KafkaCollector collector = builder.build()) {
collector.start();
assertThat(receivedSpans.take()).containsExactlyElementsOf(TRACE);
// the only way we could read this, is if the malformed spans were skipped.
assertThat(receivedSpans.take()).containsExactlyElementsOf(TRACE);
}
assertThat(kafkaMetrics.messagesDropped()).isEqualTo(3);
}
/** Guards against errors that leak from storage, such as InvalidQueryException */
@Test
public void skipsOnSpanConsumerException() throws Exception {
AtomicInteger counter = new AtomicInteger();
final StorageComponent storage = buildStorage((spans, callback) -> {
if (counter.getAndIncrement() == 1) {
callback.onError(new RuntimeException("storage fell over"));
} else {
receivedSpans.add(spans);
callback.onSuccess(null);
}
});
Builder builder = builder("consumer_exception").storage(storage);
produceSpans(Codec.THRIFT.writeSpans(TRACE), builder.topic);
produceSpans(Codec.THRIFT.writeSpans(TRACE), builder.topic); // tossed on error
produceSpans(Codec.THRIFT.writeSpans(TRACE), builder.topic);
try (KafkaCollector collector = builder.build()) {
collector.start();
assertThat(receivedSpans.take()).containsExactlyElementsOf(TRACE);
// the only way we could read this, is if the malformed span was skipped.
assertThat(receivedSpans.take()).containsExactlyElementsOf(TRACE);
}
assertThat(kafkaMetrics.spansDropped()).isEqualTo(TRACE.size());
}
@Test
public void messagesDistributedAcrossMultipleThreadsSuccessfully() throws Exception {
Builder builder = builder("multi_thread", 2);
warmUpTopic(builder.topic);
final byte[] traceBytes = Codec.THRIFT.writeSpans(TRACE);
try (KafkaCollector collector = builder.build()) {
collector.start();
waitForPartitionAssignments(collector);
produceSpans(traceBytes, builder.topic, 0);
assertThat(receivedSpans.take()).containsExactlyElementsOf(TRACE);
produceSpans(traceBytes, builder.topic, 1);
assertThat(receivedSpans.take()).containsExactlyElementsOf(TRACE);
}
assertThat(threadsProvidingSpans.size()).isEqualTo(2);
assertThat(kafkaMetrics.messages()).isEqualTo(3);
assertThat(kafkaMetrics.bytes()).isEqualTo(traceBytes.length * 2);
assertThat(kafkaMetrics.spans()).isEqualTo(TRACE.size() * 2);
}
/**
* Producing this empty message triggers auto-creation of the topic and gets things "warmed up"
* on the broker before the consumers subscribe. Without this, the topic is auto-created when
* the first consumer subscribes but there appears to be a race condition where the existence of
* the topic is not known to the partition assignor when the consumer group goes through its
* initial re-balance. As a result, no partitions are assigned, there are no further changes to
* group membership to trigger another re-balance, and no messages are consumed. This initial
* message is not necessary if the test broker is re-created for each test, but that increases
* execution time for the suite by a factor of 10x (2-3s to ~25s on my local machine).
*/
private void warmUpTopic(String topic) {
produceSpans(new byte[0], topic);
}
/**
* Wait until all kafka consumers created by the collector have at least one partition
* assigned.
*/
private void waitForPartitionAssignments(KafkaCollector collector) throws Exception {
long consumersWithAssignments = 0;
while (consumersWithAssignments < collector.kafkaWorkers.streams) {
Thread.sleep(10);
consumersWithAssignments = collector.kafkaWorkers.workers.stream()
.filter(w -> !w.assignedPartitions.get().isEmpty())
.count();
}
}
private void produceSpans(byte[] spans, String topic) {
produceSpans(spans, topic, 0);
}
private void produceSpans(byte[] spans, String topic, Integer partition) {
producer.send(new ProducerRecord<>(topic, partition, null, spans));
producer.flush();
}
Builder builder(String topic) {
return builder(topic, 1);
}
Builder builder(String topic, int streams) {
return new Builder()
.metrics(metrics)
.bootstrapServers(broker.getBrokerList().get())
.topic(topic)
.groupId(topic + "_group")
.streams(streams)
.storage(buildStorage(consumer));
}
private StorageComponent buildStorage(final AsyncSpanConsumer spanConsumer) {
return new StorageComponent() {
@Override public SpanStore spanStore() {
throw new AssertionError();
}
@Override public AsyncSpanStore asyncSpanStore() {
throw new AssertionError();
}
@Override public AsyncSpanConsumer asyncSpanConsumer() {
return spanConsumer;
}
@Override public CheckResult check() {
return CheckResult.OK;
}
@Override public void close() {
throw new AssertionError();
}
};
}
}