/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.examples.loganalysis; import co.cask.cdap.api.annotation.UseDataSet; import co.cask.cdap.api.app.AbstractApplication; import co.cask.cdap.api.common.Bytes; import co.cask.cdap.api.data.stream.Stream; import co.cask.cdap.api.dataset.DatasetProperties; import co.cask.cdap.api.dataset.lib.FileSetProperties; import co.cask.cdap.api.dataset.lib.KeyValueTable; import co.cask.cdap.api.dataset.lib.TimePartitionDetail; import co.cask.cdap.api.dataset.lib.TimePartitionedFileSet; import co.cask.cdap.api.service.Service; import co.cask.cdap.api.service.http.AbstractHttpServiceHandler; import co.cask.cdap.api.service.http.HttpServiceRequest; import co.cask.cdap.api.service.http.HttpServiceResponder; import co.cask.cdap.api.spark.AbstractSpark; import co.cask.cdap.api.workflow.AbstractWorkflow; import co.cask.cdap.api.workflow.Workflow; import com.google.common.base.Charsets; import com.google.common.collect.Maps; import com.google.gson.Gson; import com.google.gson.JsonObject; import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; import org.apache.twill.filesystem.Location; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.text.DateFormat; import java.text.ParseException; import java.util.Date; import java.util.Map; import java.util.Set; import java.util.SortedSet; import java.util.TreeSet; import javax.ws.rs.GET; import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.PathParam; /** * Application that demonstrate running a Spark and MapReduce simultaneously in a {@link Workflow} */ public class LogAnalysisApp extends AbstractApplication { public static final String LOG_STREAM = "logStream"; public static final String HIT_COUNTER_SERVICE = "HitCounterService"; public static final String RESPONSE_COUNTER_SERVICE = "ResponseCounterService"; public static final String REQUEST_COUNTER_SERVICE = "RequestCounterService"; public static final String RESPONSE_COUNT_STORE = "responseCount"; public static final String HIT_COUNT_STORE = "hitCount"; public static final String REQ_COUNT_STORE = "reqCount"; @Override public void configure() { setDescription("CDAP Log Analysis App"); // A stream to ingest log data addStream(new Stream(LOG_STREAM)); // A Spark and MapReduce for processing log data addSpark(new ResponseCounterSpark()); addMapReduce(new HitCounterProgram()); addWorkflow(new LogAnalysisWorkflow()); // Services to query for result addService(HIT_COUNTER_SERVICE, new HitCounterServiceHandler()); addService(RESPONSE_COUNTER_SERVICE, new ResponseCounterHandler()); addService(REQUEST_COUNTER_SERVICE, new RequestCounterHandler()); // Datasets to store output after processing createDataset(RESPONSE_COUNT_STORE, KeyValueTable.class, DatasetProperties.builder().setDescription("Store response counts").build()); createDataset(HIT_COUNT_STORE, KeyValueTable.class, DatasetProperties.builder().setDescription("Store hit counts").build()); createDataset(REQ_COUNT_STORE, TimePartitionedFileSet.class, FileSetProperties.builder() .setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":") .setDescription("Store request counts").build()); } /** * A Workflow which ties spark and mapreduce program together for log analysis */ public static class LogAnalysisWorkflow extends AbstractWorkflow { @Override public void configure() { setDescription("Runs Spark and MapReduce log analysis programs simultaneously"); fork() .addMapReduce(HitCounterProgram.class.getSimpleName()) .also() .addSpark(ResponseCounterSpark.class.getSimpleName()) .join(); } } /** * Specification for the Spark program in this application */ public static final class ResponseCounterSpark extends AbstractSpark { @Override public void configure() { setDescription("Counts the total number of responses for every unique response code"); setMainClassName(ResponseCounterProgram.class.getName()); } } /** * A {@link Service} that responds with total number of hits for a given URL or path */ public static final class HitCounterServiceHandler extends AbstractHttpServiceHandler { private static final Gson GSON = new Gson(); private static final String URL_KEY = "url"; static final String HIT_COUNTER_SERVICE_PATH = "hitcount"; @UseDataSet(HIT_COUNT_STORE) private KeyValueTable hitCountStore; @Path(HIT_COUNTER_SERVICE_PATH) @POST public void getHitCount(HttpServiceRequest request, HttpServiceResponder responder) { String urlRequest = Charsets.UTF_8.decode(request.getContent()).toString(); String url = GSON.fromJson(urlRequest, JsonObject.class).get(URL_KEY).getAsString(); if (url == null) { responder.sendString(HttpURLConnection.HTTP_BAD_REQUEST, "A url or path must be specified with \"url\" as key in JSON.", Charsets.UTF_8); return; } // Get the total number of hits from the dataset for this path byte[] hitCount = hitCountStore.read(url.getBytes(Charsets.UTF_8)); if (hitCount == null) { responder.sendString(HttpURLConnection.HTTP_NO_CONTENT, String.format("No record found of %s", url), Charsets.UTF_8); } else { responder.sendString(String.valueOf(Bytes.toLong(hitCount))); } } } /** * A {@link Service} that responds with total number of responses for a given response code */ public static final class ResponseCounterHandler extends AbstractHttpServiceHandler { static final String RESPONSE_COUNT_PATH = "rescount"; @UseDataSet(RESPONSE_COUNT_STORE) private KeyValueTable responseCountstore; @Path(RESPONSE_COUNT_PATH + "/{rescode}") @GET public void centers(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("rescode") Integer responseCode) { byte[] read = responseCountstore.read(Bytes.toBytes(responseCode)); if (read == null) { responder.sendString(HttpURLConnection.HTTP_NO_CONTENT, String.format("No record found for response code: %s", responseCode), Charsets.UTF_8); } else { responder.sendString(String.valueOf(Bytes.toLong(read))); } } } /** * A Service which serves the number of requests made by unique ip address from a {@link TimePartitionedFileSet} */ public static final class RequestCounterHandler extends AbstractHttpServiceHandler { private static final Gson GSON = new Gson(); static final String REQUEST_COUNTER_PARTITIONS_PATH = "reqcount"; static final String REQUEST_FILE_CONTENT_PATH = "reqfile"; static final String REQUEST_FILE_PATH_HANDLER_KEY = "time"; private static final DateFormat SHORT_DATE_FORMAT = DateFormat.getDateTimeInstance(DateFormat.SHORT, DateFormat.SHORT); @UseDataSet(REQ_COUNT_STORE) private TimePartitionedFileSet reqCountStore; /** * Handler which lists all the different time partitions available in the {@link LogAnalysisApp#REQ_COUNT_STORE} * {@link TimePartitionedFileSet} */ @Path(REQUEST_COUNTER_PARTITIONS_PATH) @GET public void getRequestFilesetPartitions(HttpServiceRequest request, HttpServiceResponder responder) { // get all the existing paritions Set<TimePartitionDetail> partitionsByTime = reqCountStore.getPartitionsByTime(0, Long.MAX_VALUE); SortedSet<String> formattedTimes = new TreeSet<>(); for (TimePartitionDetail timePartitionDetail : partitionsByTime) { String partitionTime = SHORT_DATE_FORMAT.format(new Date(timePartitionDetail.getTime())); formattedTimes.add(partitionTime); } responder.sendJson(HttpURLConnection.HTTP_OK, formattedTimes); } /** * Handler which reads all the parts files from a given partition in {@link LogAnalysisApp#REQ_COUNT_STORE} * {@link TimePartitionedFileSet} and send it as a string. * Note: We make an assumption here that the contents for partitions in the tpfs for this example is not very huge. * This method of serving contents is not ideal for large contents. */ @Path(REQUEST_FILE_CONTENT_PATH) @POST public void getRequestFilesetContents(HttpServiceRequest request, HttpServiceResponder responder) { String partition = GSON.fromJson(Charsets.UTF_8.decode(request.getContent()).toString(), JsonObject.class).get(REQUEST_FILE_PATH_HANDLER_KEY).getAsString(); long partitionKey = 0; try { partitionKey = SHORT_DATE_FORMAT.parse(partition).getTime(); } catch (ParseException e) { responder.sendError(HttpURLConnection.HTTP_BAD_REQUEST, "Failed to parse the given string to a timestamp"); return; } final Location location = reqCountStore.getPartitionByTime(partitionKey).getLocation(); if (location == null) { responder.sendError(HttpURLConnection.HTTP_NOT_FOUND, "No files for the given date time string"); return; } Map<String, Integer> requestCountsMap = Maps.newHashMap(); try { for (Location file : location.list()) { if (file.getName().startsWith("part")) { try (BufferedReader reader = new BufferedReader(new InputStreamReader(file.getInputStream(), Charsets.UTF_8))) { String line; while ((line = reader.readLine()) != null) { int idx = line.indexOf(":"); requestCountsMap.put(line.substring(0, idx), Integer.parseInt(line.substring(idx + 1))); } } } } } catch (IOException e) { responder.sendError(HttpURLConnection.HTTP_INTERNAL_ERROR, e.getMessage()); return; } responder.sendJson(HttpURLConnection.HTTP_OK, requestCountsMap); } } }