/* * Copyright © 2015 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.examples.wikipedia; import co.cask.cdap.api.annotation.UseDataSet; import co.cask.cdap.api.common.Bytes; import co.cask.cdap.api.dataset.lib.CloseableIterator; import co.cask.cdap.api.dataset.lib.KeyValue; import co.cask.cdap.api.dataset.lib.KeyValueTable; import co.cask.cdap.api.dataset.table.Row; import co.cask.cdap.api.dataset.table.Scanner; import co.cask.cdap.api.dataset.table.Table; import co.cask.cdap.api.service.AbstractService; import co.cask.cdap.api.service.http.AbstractHttpServiceHandler; import co.cask.cdap.api.service.http.HttpServiceHandler; import co.cask.cdap.api.service.http.HttpServiceRequest; import co.cask.cdap.api.service.http.HttpServiceResponder; import com.google.gson.JsonObject; import java.util.ArrayList; import java.util.List; import java.util.Map; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.PathParam; /** * Service to retrieve results of analyses of Wikipedia data. */ public class WikipediaService extends AbstractService { public static final String NAME = WikipediaService.class.getSimpleName(); @Override protected void configure() { setName(NAME); setDescription("A service that allows users to query Wikipedia Data Analysis results."); addHandler(new WikipediaHandler()); } /** * {@link HttpServiceHandler} that contains endpoints for serving results of analyses of Wikipedia data. */ @Path("/v1/functions") public static final class WikipediaHandler extends AbstractHttpServiceHandler { @SuppressWarnings("unused") @UseDataSet(WikipediaPipelineApp.SPARK_CLUSTERING_OUTPUT_DATASET) private Table clusteringTable; @SuppressWarnings("unused") @UseDataSet(WikipediaPipelineApp.MAPREDUCE_TOPN_OUTPUT) private KeyValueTable topNKVTable; /** * The {@link SparkWikipediaClustering} program generates a list of topics for the input data. This API returns the * list of topics that were generated by the {@link SparkWikipediaClustering} program. */ @GET @Path("/lda/topics") public void getTopics(HttpServiceRequest request, HttpServiceResponder responder) { List<Integer> topics = new ArrayList<>(); Scanner scanner = clusteringTable.scan(null, null); Row row; while ((row = scanner.next()) != null) { topics.add(Bytes.toInt(row.getRow())); } responder.sendJson(topics); } /** * Returns the details of a particular topic. Each topic contains a list of terms and and their weight in the * specified topic. * * @param topic the topic to return details for */ @GET @Path("/lda/topics/{topic}") public void getTopic(HttpServiceRequest request, HttpServiceResponder responder, @PathParam("topic") Integer topic) { Row row = clusteringTable.get(Bytes.toBytes(topic)); if (row.isEmpty()) { responder.sendError(404, String.format("Topic %s was not found.", topic)); return; } List<Term> terms = new ArrayList<>(); Map<byte[], byte[]> columns = row.getColumns(); for (Map.Entry<byte[], byte[]> next : columns.entrySet()) { terms.add(new Term(Bytes.toString(next.getKey()), Bytes.toDouble(next.getValue()))); } responder.sendJson(terms); } /** * Returns the list of words emitted by the {@link TopNMapReduce} program. */ @GET @Path("/topn/words") public void getTopNWords(HttpServiceRequest request, HttpServiceResponder responder) { List<JsonObject> words = new ArrayList<>(); CloseableIterator<KeyValue<byte[], byte[]>> scanner = topNKVTable.scan(null, null); while (scanner.hasNext()) { KeyValue<byte[], byte[]> next = scanner.next(); JsonObject jsonObject = new JsonObject(); jsonObject.addProperty(Bytes.toString(next.getKey()), Bytes.toInt(next.getValue())); words.add(jsonObject); } responder.sendJson(words); } } }