/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.examples.streamconversion; import co.cask.cdap.api.app.AbstractApplication; import co.cask.cdap.api.data.schema.Schema; import co.cask.cdap.api.data.stream.Stream; import co.cask.cdap.api.dataset.lib.FileSetProperties; import co.cask.cdap.api.dataset.lib.TimePartitionedFileSet; import co.cask.cdap.api.schedule.Schedules; import org.apache.avro.mapreduce.AvroKeyInputFormat; import org.apache.avro.mapreduce.AvroKeyOutputFormat; /** * An application that illustrates the use of time-partitioned file sets by the example of * periodic stream conversion. */ public class StreamConversionApp extends AbstractApplication { static final String SCHEMA_STRING = Schema.recordOf( "streamEvent", Schema.Field.of("time", Schema.of(Schema.Type.LONG)), Schema.Field.of("body", Schema.of(Schema.Type.STRING))).toString(); @Override public void configure() { addStream(new Stream("events")); addMapReduce(new StreamConversionMapReduce()); addWorkflow(new StreamConversionWorkflow()); scheduleWorkflow(Schedules.builder("every5min") .setDescription("runs every 5 minutes") .createTimeSchedule("*/5 * * * *"), "StreamConversionWorkflow"); // create the time-partitioned file set, configure it to work with MapReduce and with Explore createDataset("converted", TimePartitionedFileSet.class, FileSetProperties.builder() // properties for file set .setBasePath("converted") .setInputFormat(AvroKeyInputFormat.class) .setOutputFormat(AvroKeyOutputFormat.class) .setOutputProperty("schema", SCHEMA_STRING) // properties for explore (to create a partitioned hive table) .setEnableExploreOnCreate(true) .setSerDe("org.apache.hadoop.hive.serde2.avro.AvroSerDe") .setExploreInputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerInputFormat") .setExploreOutputFormat("org.apache.hadoop.hive.ql.io.avro.AvroContainerOutputFormat") .setTableProperty("avro.schema.literal", SCHEMA_STRING) .setDescription("Converted stream events dataset") .build()); } }