/*
* Copyright © 2016 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.admin;
import co.cask.cdap.api.Admin;
import co.cask.cdap.api.RuntimeContext;
import co.cask.cdap.api.annotation.ProcessInput;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.batch.Input;
import co.cask.cdap.api.data.batch.Output;
import co.cask.cdap.api.dataset.DatasetManagementException;
import co.cask.cdap.api.dataset.DatasetProperties;
import co.cask.cdap.api.dataset.InstanceConflictException;
import co.cask.cdap.api.dataset.InstanceNotFoundException;
import co.cask.cdap.api.dataset.lib.FileSet;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.dataset.table.Table;
import co.cask.cdap.api.flow.AbstractFlow;
import co.cask.cdap.api.flow.flowlet.AbstractFlowlet;
import co.cask.cdap.api.flow.flowlet.OutputEmitter;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.mapreduce.MapReduceContext;
import co.cask.cdap.api.service.AbstractService;
import co.cask.cdap.api.service.http.AbstractHttpServiceHandler;
import co.cask.cdap.api.service.http.HttpServiceRequest;
import co.cask.cdap.api.service.http.HttpServiceResponder;
import co.cask.cdap.api.spark.AbstractSpark;
import co.cask.cdap.api.spark.JavaSparkExecutionContext;
import co.cask.cdap.api.spark.JavaSparkMain;
import co.cask.cdap.api.worker.AbstractWorker;
import co.cask.cdap.api.workflow.AbstractWorkflow;
import co.cask.cdap.api.workflow.AbstractWorkflowAction;
import co.cask.cdap.internal.guava.reflect.TypeToken;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;
import com.google.gson.Gson;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.FlatMapFunction;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFunction;
import org.junit.Assert;
import scala.Tuple2;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import javax.ws.rs.DELETE;
import javax.ws.rs.GET;
import javax.ws.rs.POST;
import javax.ws.rs.PUT;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
public class AdminApp extends AbstractApplication {
public static final String FLOW_NAME = "AdminFlow";
public static final String MAPREDUCE_NAME = "AdminMapReduce";
public static final String SPARK_NAME = "AdminSpark";
public static final String SPARK_SCALA_NAME = "AdminScalaSpark";
public static final String SERVICE_NAME = "AdminService";
public static final String WORKER_NAME = "AdminWorker";
public static final String WORKFLOW_NAME = "AdminWorkflow";
@Override
public void configure() {
addStream("events");
addFlow(new AdminFlow());
addMapReduce(new AdminMapReduce());
addSpark(new AdminSpark());
addSpark(new AdminScalaSpark());
addWorker(new AdminWorker());
addWorkflow(new AdminWorkflow());
addService(new AbstractService() {
@Override
protected void configure() {
setName(SERVICE_NAME);
addHandler(new DatasetAdminHandler());
}
});
}
public static class DatasetAdminHandler extends AbstractHttpServiceHandler {
private static final Gson GSON = new Gson();
@GET
@Path("exists/{dataset}")
public void exists(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("dataset") String dataset) throws DatasetManagementException {
Admin admin = getContext().getAdmin();
responder.sendString(Boolean.toString(admin.datasetExists(dataset)));
}
@GET
@Path("type/{dataset}")
public void type(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("dataset") String dataset) throws DatasetManagementException {
Admin admin = getContext().getAdmin();
if (!admin.datasetExists(dataset)) {
responder.sendStatus(404);
return;
}
responder.sendString(admin.getDatasetType(dataset));
}
@GET
@Path("props/{dataset}")
public void properties(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("dataset") String dataset) throws DatasetManagementException {
Admin admin = getContext().getAdmin();
if (!admin.datasetExists(dataset)) {
responder.sendStatus(404);
return;
}
responder.sendJson(200, admin.getDatasetProperties(dataset).getProperties());
}
@PUT
@Path("create/{dataset}/{type}")
public void create(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("dataset") String dataset, @PathParam("type") String type)
throws DatasetManagementException {
DatasetProperties datasetProps = parseBodyAsProps(request);
Admin admin = getContext().getAdmin();
try {
admin.createDataset(dataset, type, datasetProps);
responder.sendStatus(200);
} catch (InstanceConflictException e) {
responder.sendStatus(409);
}
}
@PUT
@Path("update/{dataset}")
public void update(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("dataset") String dataset)
throws DatasetManagementException {
DatasetProperties datasetProps = parseBodyAsProps(request);
Admin admin = getContext().getAdmin();
try {
admin.updateDataset(dataset, datasetProps);
responder.sendStatus(200);
} catch (InstanceNotFoundException e) {
responder.sendStatus(404);
}
}
@POST
@Path("truncate/{dataset}")
public void truncate(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("dataset") String dataset)
throws DatasetManagementException {
Admin admin = getContext().getAdmin();
try {
admin.truncateDataset(dataset);
responder.sendStatus(200);
} catch (InstanceNotFoundException e) {
responder.sendStatus(404);
}
}
@DELETE
@Path("delete/{dataset}")
public void delete(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("dataset") String dataset)
throws DatasetManagementException {
Admin admin = getContext().getAdmin();
try {
admin.dropDataset(dataset);
responder.sendStatus(200);
} catch (InstanceNotFoundException e) {
responder.sendStatus(404);
}
}
private static DatasetProperties parseBodyAsProps(HttpServiceRequest request) {
String body = Bytes.toString(request.getContent());
if (body.isEmpty()) {
return DatasetProperties.EMPTY;
}
Map<String, String> props = GSON.fromJson(body, new TypeToken<Map<String, String>>() { }.getType());
return DatasetProperties.builder().addAll(props).build();
}
}
public static class AdminWorker extends AbstractWorker {
@Override
protected void configure() {
super.configure();
setName(WORKER_NAME);
}
@Override
public void run() {
performAdmin(getContext());
}
}
// this will get called from the worker, also from a custom workflow action
static void performAdmin(RuntimeContext context) {
Admin admin = context.getAdmin();
Map<String, String> args = context.getRuntimeArguments();
try {
// if invoked with dropAll=true, clean up all datasets (a, b, c, d)
if ("true".equals(args.get("dropAll"))) {
for (String name : new String[]{"a", "b", "c", "d"}) {
if (admin.datasetExists(name)) {
admin.dropDataset(name);
}
}
} else {
// create a, update b with /extra in base path, truncate c, drop d
admin.createDataset("a", Table.class.getName(), DatasetProperties.EMPTY);
String type = admin.getDatasetType("b");
Assert.assertEquals(FileSet.class.getName(), type);
DatasetProperties bProps = admin.getDatasetProperties("b");
String base = bProps.getProperties().get("base.path");
Assert.assertNotNull(base);
String newBase = base + "/extra";
DatasetProperties newBProps = DatasetProperties.builder()
.addAll(bProps.getProperties()).add("base.path", newBase).build();
admin.updateDataset("b", newBProps);
admin.truncateDataset("c");
admin.dropDataset("d");
}
} catch (DatasetManagementException e) {
Throwables.propagate(e);
}
}
public static class AdminFlow extends AbstractFlow {
@Override
protected void configure() {
setName(FLOW_NAME);
addFlowlet("splitter", new SplitterFlowlet());
addFlowlet("counter", new CounterFlowlet());
connectStream("events", "splitter");
connect("splitter", "counter");
}
public static class SplitterFlowlet extends AbstractFlowlet {
OutputEmitter<String> out;
@ProcessInput
public void process(StreamEvent event) {
for (String word : Bytes.toString(event.getBody()).split(" ")) {
out.emit(word.toLowerCase());
}
}
}
public static class CounterFlowlet extends AbstractFlowlet {
Map<Character, KeyValueTable> tables = new HashMap<>();
@ProcessInput
public void process(String word) throws DatasetManagementException {
Character c = word.charAt(0);
if (!tables.containsKey(c)) {
getContext().getAdmin().createDataset("counters_" + c, "keyValueTable", DatasetProperties.EMPTY);
tables.put(c, getContext().<KeyValueTable>getDataset("counters_" + c));
}
KeyValueTable counters = tables.get(c);
counters.increment(Bytes.toBytes(word), 1L);
}
@Override
public void destroy() {
for (Character c : tables.keySet()) {
try {
getContext().getAdmin().dropDataset("counters_" + c);
} catch (DatasetManagementException e) {
Throwables.propagate(e);
}
}
}
}
}
public static class AdminWorkflow extends AbstractWorkflow {
@Override
protected void configure() {
setName(WORKFLOW_NAME);
addAction(new AdminAction());
}
public static class AdminAction extends AbstractWorkflowAction {
@Override
public void run() {
performAdmin(getContext());
}
}
}
public static class AdminMapReduce extends AbstractMapReduce {
@Override
protected void configure() {
setName(MAPREDUCE_NAME);
}
@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
Job job = context.getHadoopJob();
job.setMapperClass(Tokenizer.class);
job.setReducerClass(Counter.class);
job.setNumReduceTasks(1);
context.addInput(Input.ofDataset("lines"));
context.addOutput(Output.ofDataset("counts"));
// truncate the output dataset
context.getAdmin().truncateDataset("counts");
}
public static class Tokenizer extends Mapper<byte[], byte[], Text, LongWritable> {
static final LongWritable ONE = new LongWritable(1L);
@Override
protected void map(byte[] key, byte[] value, Context context) throws IOException, InterruptedException {
for (String word : Bytes.toString(value).split(" ")) {
context.write(new Text(word), ONE);
}
}
}
public static class Counter extends Reducer<Text, LongWritable, byte[], byte[]> {
@Override
protected void reduce(Text key, Iterable<LongWritable> values, Context context)
throws IOException, InterruptedException {
context.write(Bytes.toBytes(key.toString()), Bytes.toBytes(Iterables.size(values)));
}
}
}
public static class AdminSpark extends AbstractSpark {
@Override
protected void configure() {
setName(SPARK_NAME);
setMainClass(WordCountSpark.class);
}
public static class WordCountSpark implements JavaSparkMain {
@Override
public void run(JavaSparkExecutionContext sec) throws Exception {
JavaSparkContext jsc = new JavaSparkContext();
JavaPairRDD<byte[], byte[]> input = sec.fromDataset("lines");
JavaRDD<String> words = input.values().flatMap(new FlatMapFunction<byte[], String>() {
public Iterable<String> call(byte[] line) {
return Arrays.asList(Bytes.toString(line).split(" "));
}
});
JavaPairRDD<String, Integer> pairs = words.mapToPair(new PairFunction<String, String, Integer>() {
public Tuple2<String, Integer> call(String s) {
return new Tuple2<>(s, 1);
}
});
JavaPairRDD<String, Integer> counts = pairs.reduceByKey(new Function2<Integer, Integer, Integer>() {
public Integer call(Integer a, Integer b) {
return a + b;
}
});
JavaPairRDD<byte[], byte[]> result = counts.mapToPair(
new PairFunction<Tuple2<String, Integer>, byte[], byte[]>() {
@Override
public Tuple2<byte[], byte[]> call(Tuple2<String, Integer> input) throws Exception {
return new Tuple2<>(Bytes.toBytes(input._1()), Bytes.toBytes(input._2()));
}
});
sec.getAdmin().truncateDataset("counts");
sec.saveAsDataset(result, "counts");
}
}
}
public static class AdminScalaSpark extends AbstractSpark {
@Override
protected void configure() {
setName(SPARK_SCALA_NAME);
setMainClass(ScalaAdminSparkProgram.class);
}
}
}