/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.examples.sparkstreaming;
import co.cask.cdap.common.utils.Networks;
import co.cask.cdap.common.utils.Tasks;
import co.cask.cdap.test.ApplicationManager;
import co.cask.cdap.test.ServiceManager;
import co.cask.cdap.test.SparkManager;
import co.cask.cdap.test.StreamManager;
import co.cask.cdap.test.TestBase;
import co.cask.common.http.HttpRequest;
import co.cask.common.http.HttpRequests;
import co.cask.common.http.HttpResponse;
import com.google.common.base.Charsets;
import org.apache.twill.internal.kafka.EmbeddedKafkaServer;
import org.apache.twill.internal.kafka.client.ZKKafkaClientService;
import org.apache.twill.internal.zookeeper.InMemoryZKServer;
import org.apache.twill.kafka.client.Compression;
import org.apache.twill.kafka.client.KafkaClientService;
import org.apache.twill.kafka.client.KafkaPublisher;
import org.apache.twill.zookeeper.ZKClientService;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
/**
* Test for {@link SpamClassifier}
*/
public class SpamClassifierTest extends TestBase {
private static final String KAFKA_TOPIC = "someTopic";
private static final String KAFKA_BROKER_ID = "1";
private static ZKClientService zkClient;
private static KafkaClientService kafkaClient;
private static InMemoryZKServer zkServer;
private static EmbeddedKafkaServer kafkaServer;
private static int kafkaPort;
@BeforeClass
public static void init() throws Exception {
zkServer = InMemoryZKServer.builder().setDataDir(TMP_FOLDER.newFolder()).build();
zkServer.startAndWait();
kafkaPort = Networks.getRandomPort();
kafkaServer = new EmbeddedKafkaServer(generateKafkaConfig(zkServer.getConnectionStr(), kafkaPort,
TMP_FOLDER.newFolder()));
kafkaServer.startAndWait();
zkClient = ZKClientService.Builder.of(zkServer.getConnectionStr()).build();
zkClient.startAndWait();
kafkaClient = new ZKKafkaClientService(zkClient);
kafkaClient.startAndWait();
}
@AfterClass
public static void cleanup() {
kafkaClient.stopAndWait();
zkClient.stopAndWait();
kafkaServer.stopAndWait();
zkServer.stopAndWait();
}
@Test
public void test() throws Exception {
// Deploy the KafkaIngestionApp application
ApplicationManager appManager = deployApplication(SpamClassifier.class);
ingestTrainingData();
publishKafkaMessages();
// start spark streaming program
SparkManager sparkManager = appManager.getSparkManager(SpamClassifierProgram.class.getSimpleName());
Map<String, String> runtimeArgs = new HashMap<>();
runtimeArgs.put("kafka.brokers", "127.0.0.1:" + kafkaPort);
runtimeArgs.put("kafka.topics", KAFKA_TOPIC);
sparkManager.start(runtimeArgs);
// Start and wait for service to start
final ServiceManager serviceManager = appManager.getServiceManager(SpamClassifier.SERVICE_HANDLER).start();
serviceManager.waitForStatus(true);
// wait for spark streaming program to write to dataset
Tasks.waitFor(true, new Callable<Boolean>() {
@Override
public Boolean call() throws Exception {
return testClassification(serviceManager, "1", SpamClassifier.SpamClassifierServiceHandler.SPAM) &&
testClassification(serviceManager, "2", SpamClassifier.SpamClassifierServiceHandler.HAM);
}
}, 60, TimeUnit.SECONDS);
// stop spark program
sparkManager.stop();
sparkManager.waitForFinish(10, TimeUnit.SECONDS);
appManager.stopAll();
}
private boolean testClassification(ServiceManager serviceManager, String messageId, String expected) throws
IOException {
URL url = new URL(serviceManager.getServiceURL(15, TimeUnit.SECONDS),
SpamClassifier.SpamClassifierServiceHandler.CLASSIFICATION_PATH + "/" + messageId);
HttpResponse response = HttpRequests.execute(HttpRequest.get(url).build());
return (HttpURLConnection.HTTP_OK == response.getResponseCode() &&
expected.equalsIgnoreCase(response.getResponseBodyAsString()));
}
private void publishKafkaMessages() {
KafkaPublisher publisher = kafkaClient.getPublisher(KafkaPublisher.Ack.ALL_RECEIVED, Compression.NONE);
KafkaPublisher.Preparer preparer = publisher.prepare(KAFKA_TOPIC);
preparer.add(Charsets.UTF_8.encode("1:REMINDER FROM O2: To get 2.50 pounds free call credit and details of great " +
"offers pls reply 2 this text with your valid name, house no and postcode"),
"1"); // spam
preparer.add(Charsets.UTF_8.encode("2:I will call you later"), "2"); // ham
preparer.send();
}
private void ingestTrainingData() throws IOException {
StreamManager streamManager = getStreamManager(SpamClassifier.STREAM);
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(getClass().getResourceAsStream("/trainingData.txt"), "UTF-8"))) {
String line = reader.readLine();
while (line != null) {
streamManager.send(line);
line = reader.readLine();
}
}
}
private static Properties generateKafkaConfig(String zkConnectStr, int port, File logDir) {
Properties prop = new Properties();
prop.setProperty("log.dir", logDir.getAbsolutePath());
prop.setProperty("host.name", "127.0.0.1");
prop.setProperty("port", Integer.toString(port));
prop.setProperty("broker.id", KAFKA_BROKER_ID);
prop.setProperty("socket.send.buffer.bytes", "1048576");
prop.setProperty("socket.receive.buffer.bytes", "1048576");
prop.setProperty("socket.request.max.bytes", "104857600");
prop.setProperty("log.retention.hours", "24");
prop.setProperty("log.flush.interval.messages", "10000");
prop.setProperty("log.flush.interval.ms", "1000");
prop.setProperty("log.segment.bytes", "536870912");
prop.setProperty("zookeeper.connect", zkConnectStr);
prop.setProperty("zookeeper.connection.timeout.ms", "1000000");
prop.setProperty("default.replication.factor", "1");
return prop;
}
}