/*
* Copyright © 2014 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.mapreduce;
import co.cask.cdap.api.app.AbstractApplication;
import co.cask.cdap.api.common.Bytes;
import co.cask.cdap.api.data.format.FormatSpecification;
import co.cask.cdap.api.data.format.Formats;
import co.cask.cdap.api.data.format.StructuredRecord;
import co.cask.cdap.api.data.schema.Schema;
import co.cask.cdap.api.data.stream.Stream;
import co.cask.cdap.api.data.stream.StreamBatchReadable;
import co.cask.cdap.api.dataset.lib.KeyValueTable;
import co.cask.cdap.api.mapreduce.AbstractMapReduce;
import co.cask.cdap.api.mapreduce.MapReduceContext;
import co.cask.cdap.api.stream.GenericStreamEventData;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.Collections;
/**
* App used to test whether M/R can read from streams.
*/
public class AppWithMapReduceUsingStream extends AbstractApplication {
static final Schema SCHEMA = Schema.recordOf(
"event",
Schema.Field.of("ticker", Schema.of(Schema.Type.STRING)),
Schema.Field.of("num_traded", Schema.of(Schema.Type.INT)),
Schema.Field.of("price", Schema.of(Schema.Type.FLOAT))
);
@Override
public void configure() {
setName("AppWithMapReduceUsingStream");
setDescription("Application with MapReduce job using stream as input");
addStream(new Stream("mrStream"));
createDataset("prices", KeyValueTable.class);
addMapReduce(new BodyTracker());
}
public static final class BodyTracker extends AbstractMapReduce {
@Override
public void configure() {
setOutputDataset("prices");
}
@Override
public void beforeSubmit(MapReduceContext context) throws Exception {
Job job = context.getHadoopJob();
job.setMapperClass(TickerMapper.class);
job.setReducerClass(PriceCounter.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(FloatWritable.class);
job.setOutputKeyClass(byte[].class);
job.setOutputValueClass(byte[].class);
FormatSpecification formatSpec = new FormatSpecification(
Formats.AVRO, SCHEMA, Collections.<String, String>emptyMap());
StreamBatchReadable.useStreamInput(context, "mrStream", 0, Long.MAX_VALUE, formatSpec);
}
}
// reads input from the stream as avro and calculates the total prices of all stocks traded
public static class TickerMapper extends
Mapper<LongWritable, GenericStreamEventData<StructuredRecord>, Text, FloatWritable> {
@Override
public void map(LongWritable key, GenericStreamEventData<StructuredRecord> eventData, Context context)
throws IOException, InterruptedException {
StructuredRecord body = eventData.getBody();
String ticker = body.get("ticker").toString();
Integer numTraded = body.get("num_traded");
Float price = body.get("price");
context.write(new Text(ticker), new FloatWritable(numTraded * price));
}
}
// reads input from the stream and records the last timestamp that the body was seen
public static class PriceCounter extends Reducer<Text, FloatWritable, byte[], byte[]> {
@Override
public void reduce(Text key, Iterable<FloatWritable> prices, Context context)
throws IOException, InterruptedException {
Float totalPrice = 0f;
for (FloatWritable price : prices) {
totalPrice += price.get();
}
context.write(key.getBytes(), Bytes.toBytes(totalPrice));
}
}
}