/* * Copyright © 2014-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.examples.purchase; import co.cask.cdap.api.ProgramLifecycle; import co.cask.cdap.api.Resources; import co.cask.cdap.api.annotation.UseDataSet; import co.cask.cdap.api.data.batch.Input; import co.cask.cdap.api.data.batch.Output; import co.cask.cdap.api.dataset.lib.KeyValueTable; import co.cask.cdap.api.mapreduce.AbstractMapReduce; import co.cask.cdap.api.mapreduce.MapReduceContext; import co.cask.cdap.api.metrics.Metrics; import co.cask.common.http.HttpRequest; import co.cask.common.http.HttpRequests; import co.cask.common.http.HttpResponse; import com.google.gson.Gson; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.net.HttpURLConnection; import java.net.URL; import java.util.Map; /** * MapReduce that reads purchases from the purchases DataSet and creates a purchase history for every user */ public class PurchaseHistoryBuilder extends AbstractMapReduce { public static final String MAPPER_MEMORY_MB = "mapper.memory.mb"; public static final String REDUCER_MEMORY_MB = "reducer.memory.mb"; @Override public void configure() { setDescription("Purchase History Builder"); setDriverResources(new Resources(1024)); setMapperResources(new Resources(1024)); setReducerResources(new Resources(1024)); } @Override public void beforeSubmit(MapReduceContext context) throws Exception { Job job = context.getHadoopJob(); job.setReducerClass(PerUserReducer.class); context.addInput(Input.ofDataset("purchases"), PurchaseMapper.class); context.addOutput(Output.ofDataset("history")); // override default memory usage if the corresponding runtime arguments are set. Map<String, String> runtimeArgs = context.getRuntimeArguments(); String mapperMemoryMBStr = runtimeArgs.get(MAPPER_MEMORY_MB); if (mapperMemoryMBStr != null) { context.setMapperResources(new Resources(Integer.parseInt(mapperMemoryMBStr))); } String reducerMemoryMBStr = runtimeArgs.get(REDUCER_MEMORY_MB); if (reducerMemoryMBStr != null) { context.setReducerResources(new Resources(Integer.parseInt(reducerMemoryMBStr))); } } /** * Mapper class to emit user and corresponding purchase information */ public static class PurchaseMapper extends Mapper<byte[], Purchase, Text, Purchase> { private Metrics mapMetrics; @Override public void map(byte[] key, Purchase purchase, Context context) throws IOException, InterruptedException { String user = purchase.getCustomer(); if (purchase.getPrice() > 100000) { mapMetrics.count("purchases.large", 1); } context.write(new Text(user), purchase); } } /** * Reducer class to aggregate all purchases per user */ public static class PerUserReducer extends Reducer<Text, Purchase, String, PurchaseHistory> implements ProgramLifecycle<MapReduceContext> { @UseDataSet("frequentCustomers") private KeyValueTable frequentCustomers; private Metrics reduceMetrics; private URL userProfileServiceURL; private static final int RARE_PURCHASE_COUNT = 1; private static final int FREQUENT_PURCHASE_COUNT = 10; private static final Logger LOG = LoggerFactory.getLogger(PerUserReducer.class); @Override public void initialize(MapReduceContext context) throws Exception { userProfileServiceURL = context.getServiceURL(UserProfileServiceHandler.SERVICE_NAME); } public void reduce(Text customer, Iterable<Purchase> values, Context context) throws IOException, InterruptedException { UserProfile userProfile = null; try { URL url = new URL(userProfileServiceURL, UserProfileServiceHandler.USER_ENDPOINT + "/" + customer.toString()); HttpRequest request = HttpRequest.get(url).build(); HttpResponse response = HttpRequests.execute(request); if (response.getResponseCode() != HttpURLConnection.HTTP_NO_CONTENT) { userProfile = new Gson().fromJson(response.getResponseBodyAsString(), UserProfile.class); } } catch (Exception e) { LOG.warn("Error accessing user profile: {}", e.getCause()); } PurchaseHistory purchases = new PurchaseHistory(customer.toString(), userProfile); int numPurchases = 0; for (Purchase val : values) { purchases.add(new Purchase(val)); numPurchases++; } if (numPurchases == RARE_PURCHASE_COUNT) { reduceMetrics.count("customers.rare", 1); } else if (numPurchases > FREQUENT_PURCHASE_COUNT) { reduceMetrics.count("customers.frequent", 1); frequentCustomers.write(customer.toString(), String.valueOf(numPurchases)); } context.write(customer.toString(), purchases); } @Override public void destroy() { // no-op } } }