/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.examples.wikipedia; import co.cask.cdap.api.Resources; import co.cask.cdap.api.spark.AbstractSpark; /** * Spark program that executes in a workflow and analyzes wikipedia data */ public class SparkWikipediaClustering extends AbstractSpark { public static final String NAME = SparkWikipediaClustering.class.getSimpleName(); private final WikipediaPipelineApp.WikipediaAppConfig appConfig; public SparkWikipediaClustering(WikipediaPipelineApp.WikipediaAppConfig appConfig) { this.appConfig = appConfig; } @Override protected void configure() { if ("lda".equals(appConfig.clusteringAlgorithm)) { setDescription("A Spark program that analyzes wikipedia data using Latent Dirichlet Allocation (LDA)."); setMainClass(ScalaSparkLDA.class); } else if ("kmeans".equals(appConfig.clusteringAlgorithm)) { setDescription("A Spark program that analyzes wikipedia data using K-Means."); setMainClass(ScalaSparkKMeans.class); } else { throw new IllegalArgumentException("Only 'lda' and 'kmeans' are supported as clustering algorithms. " + "Found " + appConfig.clusteringAlgorithm); } setName(NAME + "-" + appConfig.clusteringAlgorithm.toUpperCase()); setDriverResources(new Resources(1024)); setExecutorResources(new Resources(1024)); } }