package io.dstream.tez; import static io.dstream.utils.Tuples.Tuple2.tuple2; import static io.dstream.utils.Tuples.Tuple4.tuple4; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.util.Collections; import java.util.List; import java.util.Map.Entry; import java.util.concurrent.Future; import java.util.stream.Collectors; import java.util.stream.Stream; import org.junit.After; import org.junit.Before; import org.junit.Test; import io.dstream.DStream; import io.dstream.utils.KVUtils; import io.dstream.utils.StringUtils; import io.dstream.utils.Tuples.Tuple2; import io.dstream.utils.Tuples.Tuple4; public class DStreamExecutionTests extends BaseTezTests { private static String EXECUTION_NAME = DStreamExecutionTests.class.getSimpleName(); @Before public void before(){ clean(EXECUTION_NAME); } @After public void after(){ clean(EXECUTION_NAME); } @Test public void noOperations() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("The ship drew on and had safely passed the strait, which some volcanic", p1Result.get(0)); assertEquals("shock has made between the Calasareigne and Jaros islands; had doubled", p1Result.get(1)); } @Test public void classifySource() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .classify(record -> 1) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals("shock has made between the Calasareigne and Jaros islands; had doubled", p2Result.get(1)); } @Test public void classifyWithPriorTransformation() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .map(s -> s.toLowerCase()) .classify(record -> record.substring(0, 1)) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("happened on board.", p1Result.get(0)); assertEquals("the ship drew on and had safely passed the strait, which some volcanic", p1Result.get(2)); } @Test public void classifyWithPriorKVProducingTransformation() throws Exception { Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc") .map(s -> KVUtils.kv(s, 1)) .classify(entry -> entry.getKey().substring(0, 6)) .executeAs(EXECUTION_NAME); Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(KVUtils.kv("The ship drew on and had safely passed the strait, which some volcanic", 1), p1Result.get(0)); List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals(KVUtils.kv("Pomegue, and approached the harbor under topsails, jib, and spanker, but", 1), p2Result.get(0)); } @Test public void classifyWithTransformationAfterClassification() throws Exception { Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc") .map(s -> KVUtils.kv(s, 1)) .classify(entry -> entry.getKey().substring(0, 5)) .map(entry -> KVUtils.kv(new StringBuilder(entry.getKey()).reverse().toString(), entry.getValue())) .executeAs(EXECUTION_NAME); Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals(KVUtils.kv("evah dluoc enutrofsim tahw rehtona eno deksa ,live fo rennurerof eht", 1), p2Result.get(1)); } @Test public void classifyAfterShuffleOperation() throws Exception { Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(line -> Stream.of(line.split("\\s+"))) .reduceValues(s -> s, s -> 1, Integer::sum) .classify(entry -> 1) .executeAs(EXECUTION_NAME); Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals(KVUtils.kv("could", 1), p2Result.get(7)); } @Test public void classifyAfterShuffleAndShuffleAgain() throws Exception { Future<Stream<Stream<Entry<String, List<Entry<String, Integer>>>>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(line -> Stream.of(line.split("\\s+"))) .reduceValues(s -> s, s -> 1, Integer::sum) .classify(entry -> 1) .aggregateValues(e -> e.getKey().substring(0, 1), e -> e) .executeAs(EXECUTION_NAME); Stream<Stream<Entry<String, List<Entry<String, Integer>>>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Entry<String, List<Entry<String, Integer>>>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<Entry<String, List<Entry<String, Integer>>>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("b=[between=2, but=1, board.=1]", p1Result.get(3).toString()); assertEquals("j=[jib,=1]", p1Result.get(7).toString()); List<Entry<String, List<Entry<String, Integer>>>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals("s=[strait,=1, sedately=1, safely=1, ship=1, shock=1, slowly=1, so=1, some=1, spanker,=1]", p2Result.get(7).toString()); } @Test public void subsequentClassify() throws Exception { Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(line -> Stream.of(line.split("\\s+"))) .reduceValues(s -> s, s -> 1, Integer::sum) .classify(s -> s.getKey().substring(0, 1)) .classify(s -> s.getKey().substring(0, 2)) .executeAs(EXECUTION_NAME); Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(KVUtils.kv("Calasareigne", 1), p1Result.get(0)); assertEquals(KVUtils.kv("asked", 1), p1Result.get(2)); assertEquals(KVUtils.kv("that", 2), p1Result.get(13)); } @Test public void transformationOnly() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .map(s -> s.toUpperCase()) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("THE SHIP DREW ON AND HAD SAFELY PASSED THE STRAIT, WHICH SOME VOLCANIC", p1Result.get(0)); assertEquals("THE FORERUNNER OF EVIL, ASKED ONE ANOTHER WHAT MISFORTUNE COULD HAVE", p1Result.get(2)); } @Test public void computeOnly() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .<String>compute(stream -> stream.map(s -> s.toUpperCase())) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals("POMEGUE, AND APPROACHED THE HARBOR UNDER TOPSAILS, JIB, AND SPANKER, BUT", p2Result.get(0)); assertEquals("SO SLOWLY AND SEDATELY BETWEEN THAT THE IDLERS, WITH THAT INSTINCT WHICH IS", p2Result.get(1)); } @Test public void computeCompute() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .<String>compute(stream -> stream.map(s -> s.toUpperCase())) .<String>compute(stream -> stream.map(s -> new StringBuilder(s).reverse().toString())) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("CINACLOV EMOS HCIHW ,TIARTS EHT DESSAP YLEFAS DAH DNA NO WERD PIHS EHT", p1Result.get(0)); } @Test public void computeClassifyCompute() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .<String>compute(stream -> stream.map(s -> s.toUpperCase())) .classify(s -> s.split(" ")[0].length()) .<String>compute(stream -> stream.map(s -> new StringBuilder(s).reverse().toString())) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(".DRAOB NO DENEPPAH", p1Result.get(2)); } @Test public void computeShuffleCompute() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .<String>compute(stream -> stream.map(s -> s.toUpperCase())) .reduceValues(s -> s, s -> 1, Integer::sum) .<String>compute(stream -> stream.map(s -> new StringBuilder(s.getKey()).reverse().toString())) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("CINACLOV EMOS HCIHW ,TIARTS EHT DESSAP YLEFAS DAH DNA NO WERD PIHS EHT", p1Result.get(3)); } @Test public void shuffleOnly() throws Exception { Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc") .reduceValues(s -> s, s -> 1, Integer::sum) .executeAs(EXECUTION_NAME); Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(KVUtils.kv("The ship drew on and had safely passed the strait, which some volcanic", 1), p1Result.get(0)); } @Test public void transformationAndShuffle() throws Exception { Future<Stream<Stream<Entry<String, Integer>>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(line -> Stream.of(line.split("\\s+"))) .filter(word -> word.length() > 5) .map(s -> s.toUpperCase()) .reduceValues(word -> word, word -> 1, Integer::sum) .executeAs(EXECUTION_NAME); Stream<Stream<Entry<String, Integer>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Entry<String, Integer>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<Entry<String, Integer>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(KVUtils.kv("BETWEEN", 2), p1Result.get(0)); assertEquals(KVUtils.kv("INSTINCT", 1), p1Result.get(4)); List<Entry<String, Integer>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals(KVUtils.kv("ANOTHER", 1), p2Result.get(0)); assertEquals(KVUtils.kv("SEDATELY", 1), p2Result.get(7)); } @Test public void transformationShuffleTransformationShuffle() throws Exception { Future<Stream<Stream<Entry<Integer, List<String>>>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(line -> Stream.of(line.split("\\s+"))) .filter(word -> word.length() > 5) .map(s -> s.toUpperCase()) .reduceValues(word -> word, word -> 1, Integer::sum) .filter(entry -> entry.getKey().length() > 6) .aggregateValues(entry -> entry.getValue(), entry -> entry.getKey()) .map(s -> {Collections.sort(s.getValue()); return s;}) .executeAs(EXECUTION_NAME); Stream<Stream<Entry<Integer, List<String>>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Entry<Integer, List<String>>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<Entry<Integer, List<String>>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("2=[BETWEEN]", p1Result.get(0).toString()); List<Entry<Integer, List<String>>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals("1=[ANOTHER, APPROACHED, CALASAREIGNE, DOUBLED, FORERUNNER, HAPPENED, IDLERS,, INSTINCT, ISLANDS;, MISFORTUNE, POMEGUE,, SEDATELY, SPANKER,, STRAIT,, TOPSAILS,, VOLCANIC]", p2Result.get(0).toString()); } @Test public void reduceSource() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .reduce(String::concat) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(1, p1Result.size()); assertTrue(p1Result.get(0).startsWith("The ship drew on and had safely passed the strait, which some volcanicshock has made between")); } @Test public void reduceAfterTransformation() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(s -> Stream.of(s.split("\\s+"))) .reduce(String::concat) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(1, p1Result.size()); assertTrue(p1Result.get(0).startsWith("Theshipdrewonandhadsafelypassedthestrait,whichsomevolcanicshockhas")); } @Test public void reduceAfterShuffle() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(s -> Stream.of(s.split("\\s+"))) .reduceValues(s -> s, s -> 1, Integer::sum) .map(s -> s.toString()) .reduce(String::concat) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(1, p1Result.size()); assertTrue(p1Result.get(0).startsWith("Pomegue,=1asked=1between=2board.=1drew=1evil,=1forerunner=1harbor")); } @Test public void countSource() throws Exception { Future<Stream<Stream<Long>>> resultFuture = DStream.ofType(String.class, "wc") .count() .executeAs(EXECUTION_NAME); Stream<Stream<Long>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Long>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<Long> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(1, p1Result.size()); assertEquals(6L, (long)p1Result.get(0)); } @Test public void countAfterTransformation() throws Exception { Future<Stream<Stream<Long>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(s -> Stream.of(s.split("\\s+"))) .count() .executeAs(EXECUTION_NAME); Stream<Stream<Long>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Long>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<Long> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(1, p1Result.size()); assertEquals(62L, (long)p1Result.get(0)); } @Test public void countAfterShuffle() throws Exception { Future<Stream<Stream<Long>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(s -> Stream.of(s.split("\\s+"))) .reduceValues(s -> s, s -> 1, Integer::sum) .count() .executeAs(EXECUTION_NAME); Stream<Stream<Long>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Long>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<Long> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(1, p1Result.size()); assertEquals(49L, (long)p1Result.get(0)); } @Test public void min() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(s -> Stream.of(s.split("\\s+"))) .classify(word -> word) .min(StringUtils::compareLength) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(1, p1Result.size()); assertEquals("is", p1Result.get(0)); List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals(1, p2Result.size()); assertEquals("of", p2Result.get(0)); } @Test public void distinctSingleStage() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(s -> Stream.of(s.split("\\s+"))) .distinct() .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); p1Result.stream().collect(Collectors.toMap(s -> s, s -> 1, Integer::sum)).values().forEach(count -> { if (count > 1){ throw new IllegalStateException("value occures more then once"); } }); List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); p2Result.stream().collect(Collectors.toMap(s -> s, s -> 1, Integer::sum)).values().forEach(count -> { if (count > 1){ throw new IllegalStateException("value occures more then once"); } }); } @Test public void distinctAfterShuffle() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(s -> Stream.of(s.split("\\s+"))) .classify(word -> word.length()) .distinct() .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); p1Result.stream().collect(Collectors.toMap(s -> s, s -> 1, Integer::sum)).values().forEach(count -> { if (count > 1){ throw new IllegalStateException("value occures more then once"); } }); List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); p2Result.stream().collect(Collectors.toMap(s -> s, s -> 1, Integer::sum)).values().forEach(count -> { if (count > 1){ throw new IllegalStateException("value occures more then once"); } }); } @Test public void minMaxSingleStage() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(line -> Stream.of(line.split("\\s+"))) .max(StringUtils::compareLength) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(0, p1Result.size()); List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals(1, p2Result.size()); assertEquals("Calasareigne", p2Result.get(0)); } @Test public void minMaxAfterShuffle() throws Exception { Future<Stream<Stream<String>>> resultFuture = DStream.ofType(String.class, "wc") .flatMap(line -> Stream.of(line.split("\\s+"))) .classify(s -> s) .max(StringUtils::compareLength) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals(1, p1Result.size()); assertEquals("forerunner", p1Result.get(0)); List<String> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals(1, p2Result.size()); assertEquals("Calasareigne", p2Result.get(0)); } @Test(expected=IllegalStateException.class) public void failCrossJoinOnMultiplePartitions() throws Exception { DStream<String> one = DStream.ofType(String.class, "one"); DStream<String> two = DStream.ofType(String.class, "two"); Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one .join(two) .executeAs(EXECUTION_NAME); resultFuture.get(); } @Test(expected=IllegalStateException.class) public void failJoinMultiPartitionWithTransformationsAndPredicate() throws Exception { DStream<String> one = DStream.ofType(String.class, "one"); DStream<String> two = DStream.ofType(String.class, "two"); Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one .map(s -> s.toLowerCase()) .join(two.map(s -> s.toUpperCase())).on(t2 -> t2._1().split("\\s+")[0].equals(t2._2().split("\\s+")[2])) .executeAs(EXECUTION_NAME); resultFuture.get(); } @Test public void twoWayJoinWithPredicateAndClassifier() throws Exception { DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]); DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]); Future<Stream<Stream<Tuple2<String, String>>>> resultFuture = one .join(two).on(tuple2 -> tuple2._1().substring(0, 1).equals(tuple2._2().substring(tuple2._2().length()-1))) .executeAs(EXECUTION_NAME); Stream<Stream<Tuple2<String, String>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Tuple2<String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<Tuple2<String, String>> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("[2 Amazon, Jeff Bezos 2]", p1Result.get(0).toString()); List<Tuple2<String, String>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals("[3 Hortonworks, Arun Murthy 3]", p2Result.get(6).toString()); } @Test public void fourWayJoinWithIntermediateTransformations() throws Exception { DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]); DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]); DStream<String> three = DStream.ofType(String.class, "three").classify(a -> a.split("\\s+")[0]); DStream<String> four = DStream.ofType(String.class, "four").classify(a -> a.split("\\s+")[0]); Future<Stream<Stream<Tuple4<String, String, String, String>>>> resultFuture = one .join(two) .filter(t2 -> t2._1().contains("Hortonworks")) .map(t2 -> tuple2(t2._1().toUpperCase(), t2._2().toUpperCase())) .join(three) .join(four).on(t3 -> { String v1 = t3._1()._1().split("\\s+")[0]; String v2 = t3._1()._2().split("\\s+")[2]; String v3 = t3._2().split("\\s+")[0]; String v4 = t3._3().split("\\s+")[0]; return v1.equals(v2) && v1.equals(v3) && v1.equals(v4); }) .map(t3 -> tuple4(t3._1()._1(), t3._1()._2(), t3._2(), t3._3())) .executeAs(EXECUTION_NAME); Stream<Stream<Tuple4<String, String, String, String>>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<Tuple4<String, String, String, String>>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<Tuple4<String, String, String, String>> p2Result = resultPartitionsList.get(1).collect(Collectors.toList()); assertEquals("[3 HORTONWORKS, ROB BEARDEN 3, 3 $1B, 3 5470 Great America Parkway Santa Clara, CA 95054]", p2Result.get(0).toString()); assertTrue(resultPartitionsList.get(0).collect(Collectors.toList()).isEmpty()); } @Test public void simpleTwoWayUnionWithClassifier() throws Exception { DStream<String> one = DStream.ofType(String.class, "one").classify(a -> a.split("\\s+")[0]); DStream<String> two = DStream.ofType(String.class, "two").classify(a -> a.split("\\s+")[2]); Future<Stream<Stream<String>>> resultFuture = one .union(two) .executeAs(EXECUTION_NAME); Stream<Stream<String>> resultPartitionsStream = resultFuture.get(); // ExecutionResultUtils.printResults(resultPartitionsStream, true); List<Stream<String>> resultPartitionsList = resultPartitionsStream.collect(Collectors.toList()); assertEquals(2, resultPartitionsList.size()); // spot check List<String> p1Result = resultPartitionsList.get(0).collect(Collectors.toList()); assertEquals("2 Amazon", p1Result.get(0)); assertEquals("Jeff Bezos 2", p1Result.get(1)); assertEquals("Jeffrey Blackburn 2", p1Result.get(2)); } }