/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package demo.jaxrs.server.simple; import java.io.InputStream; import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Queue; import java.util.concurrent.ArrayBlockingQueue; import java.util.concurrent.Executor; import java.util.concurrent.ThreadPoolExecutor; import java.util.concurrent.TimeUnit; import javax.ws.rs.Consumes; import javax.ws.rs.POST; import javax.ws.rs.Path; import javax.ws.rs.Produces; import javax.ws.rs.WebApplicationException; import javax.ws.rs.container.AsyncResponse; import javax.ws.rs.container.Suspended; import javax.ws.rs.core.MediaType; import org.apache.cxf.jaxrs.ext.Oneway; import org.apache.cxf.jaxrs.ext.multipart.Attachment; import org.apache.cxf.jaxrs.ext.multipart.Multipart; import org.apache.cxf.jaxrs.ext.search.tika.TikaContentExtractor; import org.apache.cxf.jaxrs.ext.search.tika.TikaContentExtractor.TikaContent; import org.apache.spark.SparkConf; import org.apache.spark.SparkException; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.function.VoidFunction; import org.apache.spark.streaming.Durations; import org.apache.spark.streaming.api.java.JavaDStream; import org.apache.spark.streaming.api.java.JavaPairDStream; import org.apache.spark.streaming.api.java.JavaStreamingContext; import demo.jaxrs.server.SparkUtils; @Path("/") public class StreamingService { private static final Map<String, MediaType> MEDIA_TYPE_TABLE; static { MEDIA_TYPE_TABLE = new HashMap<>(); MEDIA_TYPE_TABLE.put("pdf", MediaType.valueOf("application/pdf")); MEDIA_TYPE_TABLE.put("odt", MediaType.valueOf("application/vnd.oasis.opendocument.text")); MEDIA_TYPE_TABLE.put("odp", MediaType.valueOf("application/vnd.oasis.opendocument.presentation")); } private Executor executor = new ThreadPoolExecutor(5, 5, 0, TimeUnit.SECONDS, new ArrayBlockingQueue<Runnable>(10)); private String receiverType; public StreamingService(String receiverType) { this.receiverType = receiverType; } @POST @Path("/multipart") @Consumes("multipart/form-data") @Produces("text/plain") public void processMultipartStream(@Suspended AsyncResponse async, @Multipart("file") Attachment att) { MediaType mediaType = att.getContentType(); if (mediaType == null) { String fileName = att.getContentDisposition().getFilename(); if (fileName != null) { int extDot = fileName.lastIndexOf('.'); if (extDot > 0) { mediaType = MEDIA_TYPE_TABLE.get(fileName.substring(extDot + 1)); } } } TikaContentExtractor tika = new TikaContentExtractor(); TikaContent tikaContent = tika.extract(att.getObject(InputStream.class), mediaType); processStream(async, SparkUtils.getStringsFromString(tikaContent.getContent())); } @POST @Path("/stream") @Consumes("text/plain") @Produces("text/plain") public void processSimpleStream(@Suspended AsyncResponse async, InputStream is) { processStream(async, SparkUtils.getStringsFromInputStream(is)); } @POST @Path("/streamOneWay") @Consumes("text/plain") @Oneway public void processSimpleStreamOneWay(InputStream is) { processStreamOneWay(SparkUtils.getStringsFromInputStream(is)); } private void processStream(AsyncResponse async, List<String> inputStrings) { try { SparkConf sparkConf = new SparkConf().setMaster("local[*]") .setAppName("JAX-RS Spark Connect " + SparkUtils.getRandomId()); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); SparkStreamingOutput streamOut = new SparkStreamingOutput(jssc); SparkStreamingListener sparkListener = new SparkStreamingListener(streamOut); jssc.addStreamingListener(sparkListener); JavaDStream<String> receiverStream = null; if ("queue".equals(receiverType)) { Queue<JavaRDD<String>> rddQueue = new LinkedList<>(); for (int i = 0; i < 30; i++) { rddQueue.add(jssc.sparkContext().parallelize(inputStrings)); } receiverStream = jssc.queueStream(rddQueue); } else { receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings)); } JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false); wordCounts.foreachRDD(new OutputFunction(streamOut)); jssc.start(); executor.execute(new SparkJob(async, sparkListener)); } catch (Exception ex) { // the compiler does not allow to catch SparkException directly if (ex instanceof SparkException) { async.cancel(60); } else { async.resume(new WebApplicationException(ex)); } } } private void processStreamOneWay(List<String> inputStrings) { try { SparkConf sparkConf = new SparkConf().setMaster("local[*]") .setAppName("JAX-RS Spark Connect OneWay " + SparkUtils.getRandomId()); JavaStreamingContext jssc = new JavaStreamingContext(sparkConf, Durations.seconds(1)); JavaDStream<String> receiverStream = null; if ("queue".equals(receiverType)) { Queue<JavaRDD<String>> rddQueue = new LinkedList<>(); for (int i = 0; i < 30; i++) { rddQueue.add(jssc.sparkContext().parallelize(inputStrings)); } receiverStream = jssc.queueStream(rddQueue); } else { receiverStream = jssc.receiverStream(new StringListReceiver(inputStrings)); } JavaPairDStream<String, Integer> wordCounts = SparkUtils.createOutputDStream(receiverStream, false); wordCounts.foreachRDD(new PrintOutputFunction(jssc)); jssc.start(); } catch (Exception ex) { // ignore } } private static class OutputFunction implements VoidFunction<JavaPairRDD<String, Integer>> { private static final long serialVersionUID = 1L; private SparkStreamingOutput streamOut; OutputFunction(SparkStreamingOutput streamOut) { this.streamOut = streamOut; } @Override public void call(JavaPairRDD<String, Integer> rdd) { for (Map.Entry<String, Integer> entry : rdd.collectAsMap().entrySet()) { String value = entry.getKey() + " : " + entry.getValue() + "\n"; streamOut.addResponseEntry(value); } } } private static class PrintOutputFunction implements VoidFunction<JavaPairRDD<String, Integer>> { private static final long serialVersionUID = 1L; private JavaStreamingContext jssc; PrintOutputFunction(JavaStreamingContext jssc) { this.jssc = jssc; } @Override public void call(JavaPairRDD<String, Integer> rdd) { if (!rdd.collectAsMap().isEmpty()) { for (Map.Entry<String, Integer> entry : rdd.collectAsMap().entrySet()) { String value = entry.getKey() + " : " + entry.getValue(); System.out.println(value); } jssc.stop(false); jssc.close(); } } } }