/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.examples.sparkstreaming;
import co.cask.cdap.api.annotation.UseDataSet;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.api.data.stream.Stream;
import co.cask.cdap.api.dataset.DatasetProperties;
import co.cask.cdap.api.dataset.lib.ObjectStore;
import co.cask.cdap.api.dataset.lib.ObjectStores;
import co.cask.cdap.api.service.Service;
import co.cask.cdap.api.service.http.AbstractHttpServiceHandler;
import co.cask.cdap.api.service.http.HttpServiceRequest;
import co.cask.cdap.api.service.http.HttpServiceResponder;
import com.google.common.base.Charsets;
import java.net.HttpURLConnection;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
/**
* Spark Streaming Example for classifying kafka messages through Spark MlLib NaiveBayesModel
*/
public class SpamClassifier extends AbstractApplication {
static final String SERVICE_HANDLER = "MessageClassification";
public static final String STREAM = "trainingDataStream";
public static final String DATASET = "messageClassificationStore";
@Override
public void configure() {
setName("SpamClassifier");
setDescription("A Spark Streaming Example for Kafka Message Classification");
addStream(new Stream(STREAM));
addSpark(new SpamClassifierProgram());
addService(SERVICE_HANDLER, new SpamClassifierServiceHandler());
// Store for message classification status
try {
ObjectStores.createObjectStore(getConfigurer(), DATASET, Double.class,
DatasetProperties.builder().setDescription("Kafka Message Spam " +
"Classification").build());
} catch (UnsupportedTypeException e) {
// This exception is thrown by ObjectStore if its parameter type cannot be
// (de)serialized (for example, if it is an interface and not a class, then there is
// no auto-magic way deserialize an object.) In this case that will not happen
// because Double is an actual class.
throw new RuntimeException(e);
}
}
/**
* A {@link Service} handler to get the classification of kafka messages
*/
public static final class SpamClassifierServiceHandler extends AbstractHttpServiceHandler {
static final String CLASSIFICATION_PATH = "classification";
static final String SPAM = "Spam";
static final String HAM = "Ham";
@UseDataSet(DATASET)
private ObjectStore<Double> messageClassificationStore;
@Path(CLASSIFICATION_PATH + "/{message-id}")
@GET
public void centers(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("message-id") String messageId) {
Double value = messageClassificationStore.read(Bytes.toBytes(messageId));
if (value == null) {
responder.sendString(HttpURLConnection.HTTP_NO_CONTENT,
String.format("No message was found with message id: %s", messageId), Charsets.UTF_8);
} else if (value == 0.0) { // ham
responder.sendString(HttpURLConnection.HTTP_OK, HAM, Charsets.UTF_8);
} else { // spam
responder.sendString(HttpURLConnection.HTTP_OK, SPAM, Charsets.UTF_8);
}
}
}
}