/* # Licensed Materials - Property of IBM # Copyright IBM Corp. 2015 */ package kafka; import java.io.File; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; import java.util.Map; import java.util.concurrent.Future; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathFactory; import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.Node; import org.w3c.dom.NodeList; import com.ibm.streamsx.topology.TStream; import com.ibm.streamsx.topology.Topology; import com.ibm.streamsx.topology.context.ContextProperties; import com.ibm.streamsx.topology.context.StreamsContextFactory; import com.ibm.streamsx.topology.function.Function; import com.ibm.streamsx.topology.function.Supplier; import com.ibm.streamsx.topology.function.UnaryOperator; import com.ibm.streamsx.topology.logic.Value; import com.ibm.streamsx.topology.messaging.kafka.KafkaConsumer; import com.ibm.streamsx.topology.messaging.kafka.KafkaProducer; import com.ibm.streamsx.topology.tuple.Message; import com.ibm.streamsx.topology.tuple.SimpleMessage; /** * Demonstrate integrating with the Apache Kafka messaging system * <a href="http://kafka.apache.org">http://kafka.apache.org</a>. * <p> * Connectors are used to create a bridge between topology streams * and a Kafka cluster: * <ul> * <li>{@link com.ibm.streamsx.topology.messaging.kafka.KafkaConsumer KafkaConsumer} - subscribe to Kafka topics and create streams of messages.</li> * <li>{@link com.ibm.streamsx.topology.messaging.kafka.KafkaProducer KafkaProducer} - publish streams of messages to Kafka topics.</li> * </ul> * <p> * The sample publishes some messages to a Kafka topic. * It also subscribes to the topic and reports the messages received. * The messages received may include messages from prior runs of the sample. * <p> * The sample requires a running Kafka cluster with the following * characteristics: * <ul> * <li>the kafka topic (@code kafkaSampleTopic} has been created. * e.g.<br> * {@code ${KAFKA_HOME}/bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic kafkaSampleTopic} * </li> * <li>the Kafka cluster's zookeeper connection is {@code localhost:2181}</li> * <li>the Kafka cluster's brokers addresses is {@code localhost:9092}</li> * </ul> * <p> * Required IBM Streams environment variables: * <ul> * <li>STREAMS_INSTALL - the Streams installation directory</li> * <li>STREAMS_DOMAIN_ID - the Streams domain to use for context {@code DISTRIBUTED} * <li>STREAMS_INSTANCE_ID - the Streams instance to use for context {@code DISTRIBUTED} * </ul> * <p> * See the Apache Kafka link above for information about setting up a Kafka * cluster and creating a topic. * <p> * This may be executed from the {@code samples/java/functional} directory as: * <UL> * <LI>{@code ant run.kafka.distributed} - Using Apache Ant, this will run in distributed mode.</li> * <LI>{@code ant run.kafka} - Using Apache Ant, this will run in standalone mode.</li> * <LI> * {@code java -cp functionalsamples.jar:../../../com.ibm.streamsx.topology/lib/com.ibm.streamsx.topology.jar:$STREAMS_INSTALL/lib/com.ibm.streams.operator.samples.jar * kafka.KafkaSample CONTEXT_TYPE * } - Run directly from the command line. * </LI> * <i>CONTEXT_TYPE</i> is one of: * <UL> * <LI>{@code DISTRIBUTED} - Run as an IBM Streams distributed application.</LI> * <LI>{@code STANDALONE} - Run as an IBM Streams standalone application.</LI> * <LI>{@code BUNDLE} - Create an IBM Streams application bundle.</LI> * <LI>{@code TOOLKIT} - Create an IBM Streams application toolkit.</LI> * </UL> * <LI> * An application execution within your IDE once you set the class path to include the correct jars.</LI> * </UL> */ public class KafkaSample { private static final String ZOOKEEPER_CONNECT = "localhost:2181"; private static final String KAFKA_BOOTSTRAP_SERVER_LIST = "localhost:9092"; private static final String TOPIC = "kafkaSampleTopic"; private static final int PUB_DELAY_MSEC = 5*1000; private static final String uniq = new SimpleDateFormat("HH:mm:ss.SSS").format(new Date()); private boolean captureArtifacts = false; private boolean setAppTracingLevel = false; private java.util.logging.Level appTracingLevel = java.util.logging.Level.FINE; private Map<String,Object> config = new HashMap<>(); private String streamsxMessagingVer; public static void main(String[] args) throws Exception { String contextType = "DISTRIBUTED"; if (args.length > 0) contextType = args[0]; System.out.println("\nREQUIRES:" + " Kafka topic " + TOPIC + " exists" + ", Kafka broker at " + KAFKA_BOOTSTRAP_SERVER_LIST + ", Kafka zookeeper at " + ZOOKEEPER_CONNECT + "\n" ); KafkaSample app = new KafkaSample(); app.publishSubscribe(contextType); } /** * Publish some messages to a topic, scribe to the topic and report * received messages. * @param contextType string value of a {@code StreamsContext.Type} * @throws Exception */ public void publishSubscribe(String contextType) throws Exception { setupConfig(); identifyStreamsxMessagingVer(); Topology top = new Topology("kafkaSample"); String groupId = newGroupId(top.getName()); Supplier<String> topic = new Value<String>(TOPIC); KafkaProducer producer = new KafkaProducer(top, createProducerConfig()); KafkaConsumer consumer = new KafkaConsumer(top, createConsumerConfig(groupId)); TStream<Message> msgs = makeStreamToPublish(top); // for the sample, give the consumer a chance to become ready msgs = msgs.modify(initialDelayFunc(PUB_DELAY_MSEC)); producer.publish(msgs, topic); TStream<Message> rcvdMsgs = consumer.subscribe(topic); rcvdMsgs.print(); // show what we received // Execute the topology, to send and receive the messages. Future<?> future = StreamsContextFactory.getStreamsContext(contextType) .submit(top, config); if (contextType.contains("DISTRIBUTED")) { System.out.println("\nSee the job's PE console logs for the topology output.\n"); } else if (contextType.contains("STANDALONE") || contextType.contains("EMBEDDED")) { Thread.sleep(15000); future.cancel(true); } } private Map<String,Object> createConsumerConfig(String groupId) { Map<String,Object> props = new HashMap<>(); props.put("zookeeper.connect", ZOOKEEPER_CONNECT); props.put("group.id", groupId); props.put("zookeeper.session.timeout.ms", "400"); props.put("zookeeper.sync.time.ms", "200"); props.put("auto.commit.interval.ms", "1000"); return props; } private Map<String,Object> createProducerConfig() { Map<String,Object> props = new HashMap<>(); if (streamsxMessagingVer.startsWith("2.0")) { props.put("metadata.broker.list", KAFKA_BOOTSTRAP_SERVER_LIST); props.put("serializer.class", "kafka.serializer.StringEncoder"); props.put("request.required.acks", "1"); } else { // starting with steamsx.messaging v3.0, the // kafka "new producer configs" are used. props.put("bootstrap.servers", KAFKA_BOOTSTRAP_SERVER_LIST); props.put("acks", "1"); } return props; } @SuppressWarnings("serial") private static TStream<Message> makeStreamToPublish(Topology top) { return top.strings("Hello", "Are you there?", "3 of 5", "4 of 5", "5 of 5" ).transform(new Function<String,Message>() { private String timestamp; @Override public Message apply(String v) { if (timestamp == null) timestamp = new SimpleDateFormat("HH:mm:ss.SSS ").format(new Date()); return new SimpleMessage(timestamp + v); } }); } private void setupConfig() { if (captureArtifacts) config.put(ContextProperties.KEEP_ARTIFACTS, true); if (setAppTracingLevel) config.put(ContextProperties.TRACING_LEVEL, appTracingLevel); } private String newGroupId(String name) { // be insensitive to old consumers for the topic/groupId hanging around String groupId = name + "_" + uniq.replaceAll(":", ""); System.out.println("Using Kafka consumer group.id " + groupId); return groupId; } @SuppressWarnings("serial") private static UnaryOperator<Message> initialDelayFunc(final int delayMsec) { return new UnaryOperator<Message>() { private int initialDelayMsec = delayMsec; @Override public Message apply(Message v) { if (initialDelayMsec != -1) { try { Thread.sleep(initialDelayMsec); } catch (InterruptedException e) { // done delaying } initialDelayMsec = -1; } return v; } }; } private void identifyStreamsxMessagingVer() throws Exception { String tkloc = System.getenv("STREAMS_INSTALL") + "/toolkits/com.ibm.streamsx.messaging"; File info = new File(tkloc, "info.xml"); // e.g., <info:version>2.0.1</info:version> DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); DocumentBuilder db = dbf.newDocumentBuilder(); Document d = db.parse(info); XPath xpath = XPathFactory.newInstance().newXPath(); NodeList nodes = (NodeList)xpath.evaluate("/toolkitInfoModel/identity/version", d.getDocumentElement(), XPathConstants.NODESET); Element e = (Element) nodes.item(0); Node n = e.getChildNodes().item(0); String ver = n.getNodeValue(); streamsxMessagingVer = ver; } }