/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.examples.sportresults; import co.cask.cdap.api.app.AbstractApplication; import co.cask.cdap.api.dataset.lib.PartitionedFileSet; import co.cask.cdap.api.dataset.lib.PartitionedFileSetProperties; import co.cask.cdap.api.dataset.lib.Partitioning; import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; /** * An example that illustrates using partitioned file sets through an example of sport results analytics. */ public class SportResults extends AbstractApplication { @Override public void configure() { addService(new UploadService()); addMapReduce(new ScoreCounter()); // Create the "results" partitioned file set, configure it to work with MapReduce and with Explore createDataset("results", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("league").addIntField("season").build()) // Properties for file set .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true) .setExploreFormat("csv") .setExploreSchema("date STRING, winner STRING, loser STRING, winnerpoints INT, loserpoints INT") .setDescription("FileSet dataset of game results for a sport league and season") .build()); // Create the aggregates partitioned file set, configure it to work with MapReduce and with Explore createDataset("totals", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("league").build()) // Properties for file set .setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true) .setExploreFormat("csv") .setExploreSchema("team STRING, wins INT, ties INT, losses INT, scored INT, conceded INT") .setDescription("FileSet dataset of aggregated results for each sport league") .build()); } }