/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package $package;
import co.cask.cdap.api.annotation.ProcessInput;
import co.cask.cdap.api.annotation.UseDataSet;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.schema.UnsupportedTypeException;
import co.cask.cdap.api.data.stream.Stream;
import co.cask.cdap.api.dataset.DatasetProperties;
import co.cask.cdap.api.dataset.lib.ObjectStore;
import co.cask.cdap.api.dataset.lib.ObjectStores;
import co.cask.cdap.api.flow.AbstractFlow;
import co.cask.cdap.api.flow.flowlet.AbstractFlowlet;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.api.service.AbstractService;
import co.cask.cdap.api.service.Service;
import co.cask.cdap.api.service.http.AbstractHttpServiceHandler;
import co.cask.cdap.api.service.http.HttpServiceRequest;
import co.cask.cdap.api.service.http.HttpServiceResponder;
import co.cask.cdap.api.spark.AbstractSpark;
import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.HttpURLConnection;
import java.nio.ByteBuffer;
import java.util.UUID;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
/**
* Application that demonstrate KMeans Clustering example.
*/
public class SparkKMeansApp extends AbstractApplication {
@Override
public void configure() {
setName("SparkKMeans");
setDescription("Spark KMeans app");
// Ingest data into the Application via a Stream
addStream(new Stream("pointsStream"));
// Process points data in real-time using a Flow
addFlow(new PointsFlow());
// Run a Spark program on the acquired data
addSpark(new SparkKMeansSpecification());
// Retrieve the processed data using a Service
addService(new CentersService());
// Store input and processed data in ObjectStore Datasets
try {
ObjectStores.createObjectStore(getConfigurer(), "points", Point.class,
DatasetProperties.builder().setDescription("Store points data").build());
ObjectStores.createObjectStore(getConfigurer(), "centers", String.class,
DatasetProperties.builder().setDescription("Store centers data").build());
} catch (UnsupportedTypeException e) {
// This exception is thrown by ObjectStore if its parameter type cannot be
// (de)serialized (for example, if it is an interface and not a class, then there is
// no auto-magic way deserialize an object.) In this case that will not happen
// because String is an actual class.
throw new RuntimeException(e);
}
}
/**
* A Spark Program that uses KMeans algorithm.
*/
public static final class SparkKMeansSpecification extends AbstractSpark {
@Override
public void configure() {
setName("SparkKMeansProgram");
setDescription("Spark KMeans Program");
setMainClass(SparkKMeansProgram.class);
}
}
/**
* This Flowlet reads events from a Stream and saves them to a dataset.
*/
public static final class PointsReader extends AbstractFlowlet {
private static final Logger LOG = LoggerFactory.getLogger(PointsReader.class);
@UseDataSet("points")
private ObjectStore<Point> pointsStore;
@ProcessInput
public void process(StreamEvent event) {
String body = Bytes.toString(event.getBody());
LOG.trace("Points info: {}", body);
pointsStore.write(getIdAsByte(UUID.randomUUID()), parseEvent(event));
}
private byte[] getIdAsByte(UUID uuid) {
ByteBuffer bb = ByteBuffer.wrap(new byte[16]);
bb.putLong(uuid.getMostSignificantBits());
bb.putLong(uuid.getLeastSignificantBits());
return bb.array();
}
private Point parseEvent(StreamEvent event) {
String[] parts = Bytes.toString(event.getBody()).split(" ");
Preconditions.checkArgument(parts.length == 3);
return new Point(Double.parseDouble(parts[0]), Double.parseDouble(parts[1]), Double.parseDouble(parts[2]));
}
}
/**
* This is a simple Flow that consumes points from a Stream and stores them in a dataset.
*/
public static final class PointsFlow extends AbstractFlow {
@Override
protected void configure() {
setName("PointsFlow");
setDescription("Reads points information and stores in dataset");
addFlowlet("reader", new PointsReader());
connectStream("pointsStream", "reader");
}
}
/**
* A {@link Service} that responds with calculated center based on index parameter.
*/
public static final class CentersService extends AbstractService {
public static final String SERVICE_NAME = "CentersService";
@Override
protected void configure() {
setName(SERVICE_NAME);
setDescription("A service that responds with calculated center based on index parameter.");
addHandler(new CentersServiceHandler());
}
}
/**
* Centers Service handler.
*/
public static final class CentersServiceHandler extends AbstractHttpServiceHandler {
private static final Logger LOG = LoggerFactory.getLogger(CentersService.class);
@UseDataSet("centers")
private ObjectStore<String> store;
@Path("centers/{index}")
@GET
public void centers(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("index") int index) {
LOG.debug("Try to get centers for index: {}", index);
String centers = store.read(Bytes.toBytes(index));
if (centers == null) {
LOG.debug("No centers found");
responder.sendString(HttpURLConnection.HTTP_NO_CONTENT,
String.format("No centers found for index: %s", index), Charsets.UTF_8);
} else {
LOG.debug("Retrieved centers: {}", centers);
responder.sendString(centers);
}
}
}
}