/*
* Copyright © 2014-2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap;
import co.cask.cdap.api.annotation.Output;
import co.cask.cdap.api.annotation.ProcessInput;
import co.cask.cdap.api.annotation.Property;
import co.cask.cdap.api.annotation.UseDataSet;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.stream.Stream;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.flow.AbstractFlow;
import co.cask.cdap.api.flow.flowlet.AbstractFlowlet;
import co.cask.cdap.api.flow.flowlet.Callback;
import co.cask.cdap.api.flow.flowlet.FailurePolicy;
import co.cask.cdap.api.flow.flowlet.FailureReason;
import co.cask.cdap.api.flow.flowlet.InputContext;
import co.cask.cdap.api.flow.flowlet.OutputEmitter;
import co.cask.cdap.api.flow.flowlet.StreamEvent;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.metrics.Metrics;
import co.cask.cdap.api.service.AbstractService;
import co.cask.cdap.api.service.http.AbstractHttpServiceHandler;
import co.cask.cdap.api.service.http.HttpServiceRequest;
import co.cask.cdap.api.service.http.HttpServiceResponder;
import com.google.common.base.Charsets;
import com.google.common.collect.ImmutableMap;
import com.google.common.primitives.Longs;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.util.Map;
import javax.annotation.Nullable;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
/**
* This is a sample word count app that is used in testing in
* many places.
*/
public class WordCountApp extends AbstractApplication {
private static final Logger LOG = LoggerFactory.getLogger(WordCountApp.class);
@Override
public void configure() {
setName("WordCountApp");
setDescription("Application for counting words");
addStream(new Stream("text"));
createDataset("mydataset", KeyValueTable.class);
addFlow(new WordCountFlow());
addService(new WordFrequencyService());
addMapReduce(new VoidMapReduceJob());
}
public static final class MyRecord {
private final String title;
private final String text;
private final boolean expired;
public MyRecord(String title, String text, boolean expired) {
this.title = title;
this.text = text;
this.expired = expired;
}
public String getTitle() {
return title;
}
public String getText() {
return text;
}
public boolean isExpired() {
return expired;
}
}
/**
* Map reduce job to test MDS.
*/
public static class VoidMapReduceJob extends AbstractMapReduce {
@Override
protected void configure() {
setDescription("Mapreduce that does nothing (and actually doesn't run) - it is here for testing MDS");
}
}
/**
*
*/
public static class WordCountFlow extends AbstractFlow {
@Override
protected void configureFlow() {
setName("WordCountFlow");
setDescription("Flow for counting words");
addFlowlet("StreamSource", new StreamSucker());
addFlowlet(new Tokenizer());
addFlowlet(new CountByField("word", "field"));
connectStream("text", "StreamSource");
connect("StreamSource", "Tokenizer");
connect("Tokenizer", "CountByField");
}
}
/**
*
*/
public static class StreamSucker extends AbstractFlowlet {
private OutputEmitter<MyRecord> output;
private Metrics metrics;
@ProcessInput
public void process(StreamEvent event, InputContext context) throws CharacterCodingException {
if (!"text".equals(context.getOrigin())) {
return;
}
metrics.count("stream.event", 1);
ByteBuffer buf = event.getBody();
output.emit(new MyRecord(
event.getHeaders().get("title"),
buf == null ? null : Charsets.UTF_8.newDecoder().decode(buf).toString(),
false));
}
}
/**
*
*/
public static class Tokenizer extends AbstractFlowlet {
@Output("field")
private OutputEmitter<Map<String, String>> outputMap;
@ProcessInput
public void foo(MyRecord data) {
tokenize(data.getTitle(), "title");
tokenize(data.getText(), "text");
}
private void tokenize(String str, String field) {
if (str == null) {
return;
}
final String delimiters = "[ .-]";
for (String token : str.split(delimiters)) {
outputMap.emit(ImmutableMap.of("field", field, "word", token));
}
}
}
/**
*
*/
public static class CountByField extends AbstractFlowlet implements Callback {
@Property
private final String wordKey;
@Property
private final String fieldKey;
@Property
private final long increment = 1L;
public CountByField(String wordKey, String fieldKey) {
this.wordKey = wordKey;
this.fieldKey = fieldKey;
}
@ProcessInput("field")
public void process(Map<String, String> fieldToken) {
LOG.info("process count by field: " + fieldToken);
String token = fieldToken.get(wordKey);
if (token == null) {
return;
}
String field = fieldToken.get(fieldKey);
if (field != null) {
token = field + ":" + token;
}
KeyValueTable counters = getContext().getDataset("mydataset");
counters.increment(token.getBytes(Charsets.UTF_8), increment);
byte[] bytes = counters.read(token.getBytes(Charsets.UTF_8));
LOG.info(token + " " + Longs.fromByteArray(bytes));
}
@Override
public void onSuccess(@Nullable Object input, @Nullable InputContext inputContext) {
}
@Override
public FailurePolicy onFailure(@Nullable Object input, @Nullable InputContext inputContext, FailureReason reason) {
return FailurePolicy.RETRY;
}
}
public static class WordFrequencyService extends AbstractService {
@Override
protected void configure() {
addHandler(new WordFrequencyHandler());
}
}
public static class WordFrequencyHandler extends AbstractHttpServiceHandler {
@UseDataSet("mydataset")
private KeyValueTable counters;
@GET
@Path("wordfreq/{word}")
public void wordFrequency(HttpServiceRequest request, HttpServiceResponder responder,
@PathParam("word") String word) {
byte[] value = counters.read(word);
if (value == null) {
responder.sendStatus(404);
return;
}
responder.sendJson(ImmutableMap.of(word, Bytes.toLong(value)));
}
}
}